{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.999366420274551, "global_step": 11830, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25816988945007324, "epoch": 0.0, "learning_rate": 4.999577345731192e-05, "loss": 0.2805, "step": 1, "task_loss": 0.5853821039199829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18578635156154633, "epoch": 0.0, "learning_rate": 4.9991546914623835e-05, "loss": 0.1863, "step": 2, "task_loss": 0.16780924797058105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2388908416032791, "epoch": 0.0, "learning_rate": 4.998732037193576e-05, "loss": 0.4048, "step": 3, "task_loss": 0.38245245814323425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2596370279788971, "epoch": 0.0, "learning_rate": 4.998309382924768e-05, "loss": 0.3622, "step": 4, "task_loss": 0.6480276584625244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4107702672481537, "epoch": 0.0, "learning_rate": 4.9978867286559594e-05, "loss": 0.3972, "step": 5, "task_loss": 0.26988765597343445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1514049470424652, "epoch": 0.01, "learning_rate": 4.9974640743871514e-05, "loss": 0.3625, "step": 6, "task_loss": 0.46446821093559265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28078120946884155, "epoch": 0.01, "learning_rate": 4.9970414201183434e-05, "loss": 0.3568, "step": 7, "task_loss": 0.429648220539093 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27217891812324524, "epoch": 0.01, "learning_rate": 4.9966187658495354e-05, "loss": 0.4557, "step": 8, "task_loss": 0.17257165908813477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.270322322845459, "epoch": 0.01, "learning_rate": 4.996196111580727e-05, "loss": 0.466, "step": 9, "task_loss": 0.3322319984436035 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4226338565349579, "epoch": 0.01, "learning_rate": 4.995773457311919e-05, "loss": 0.2858, "step": 10, "task_loss": 0.35513943433761597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17420129477977753, "epoch": 0.01, "learning_rate": 4.995350803043111e-05, "loss": 0.3424, "step": 11, "task_loss": 0.36419224739074707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33628472685813904, "epoch": 0.01, "learning_rate": 4.9949281487743026e-05, "loss": 0.3603, "step": 12, "task_loss": 0.4097657799720764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42833825945854187, "epoch": 0.01, "learning_rate": 4.9945054945054945e-05, "loss": 0.3342, "step": 13, "task_loss": 0.6553051471710205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3272498846054077, "epoch": 0.01, "learning_rate": 4.9940828402366865e-05, "loss": 0.29, "step": 14, "task_loss": 0.41925597190856934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31984272599220276, "epoch": 0.01, "learning_rate": 4.9936601859678785e-05, "loss": 0.4187, "step": 15, "task_loss": 0.4311681389808655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2317107617855072, "epoch": 0.01, "learning_rate": 4.9932375316990705e-05, "loss": 0.2976, "step": 16, "task_loss": 0.7428198456764221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43023306131362915, "epoch": 0.01, "learning_rate": 4.9928148774302624e-05, "loss": 0.3232, "step": 17, "task_loss": 1.6245992183685303 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4810776710510254, "epoch": 0.02, "learning_rate": 4.992392223161454e-05, "loss": 0.5375, "step": 18, "task_loss": 0.6471912860870361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2258974015712738, "epoch": 0.02, "learning_rate": 4.991969568892646e-05, "loss": 0.2664, "step": 19, "task_loss": 1.0436300039291382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4231599271297455, "epoch": 0.02, "learning_rate": 4.9915469146238384e-05, "loss": 0.5146, "step": 20, "task_loss": 0.9995765089988708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19174601137638092, "epoch": 0.02, "learning_rate": 4.9911242603550297e-05, "loss": 0.3358, "step": 21, "task_loss": 0.8044640421867371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44361889362335205, "epoch": 0.02, "learning_rate": 4.9907016060862216e-05, "loss": 0.3343, "step": 22, "task_loss": 0.6296488642692566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16035886108875275, "epoch": 0.02, "learning_rate": 4.9902789518174136e-05, "loss": 0.3306, "step": 23, "task_loss": 0.7133399844169617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31523415446281433, "epoch": 0.02, "learning_rate": 4.9898562975486056e-05, "loss": 0.2981, "step": 24, "task_loss": 0.15186113119125366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3411262035369873, "epoch": 0.02, "learning_rate": 4.9894336432797976e-05, "loss": 0.4549, "step": 25, "task_loss": 1.002905011177063 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30748438835144043, "epoch": 0.02, "learning_rate": 4.9890109890109895e-05, "loss": 0.5096, "step": 26, "task_loss": 0.8128278851509094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3724653124809265, "epoch": 0.02, "learning_rate": 4.9885883347421815e-05, "loss": 0.3626, "step": 27, "task_loss": 1.2241909503936768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1662927269935608, "epoch": 0.02, "learning_rate": 4.988165680473373e-05, "loss": 0.2403, "step": 28, "task_loss": 0.2341037094593048 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32311949133872986, "epoch": 0.02, "learning_rate": 4.987743026204565e-05, "loss": 0.3691, "step": 29, "task_loss": 1.0425922870635986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4696677029132843, "epoch": 0.03, "learning_rate": 4.987320371935757e-05, "loss": 0.3536, "step": 30, "task_loss": 0.5542616844177246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42097368836402893, "epoch": 0.03, "learning_rate": 4.986897717666949e-05, "loss": 0.3675, "step": 31, "task_loss": 0.59581458568573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24710862338542938, "epoch": 0.03, "learning_rate": 4.986475063398141e-05, "loss": 0.296, "step": 32, "task_loss": 0.6669486165046692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16573220491409302, "epoch": 0.03, "learning_rate": 4.986052409129333e-05, "loss": 0.4524, "step": 33, "task_loss": 0.8582977652549744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6471076011657715, "epoch": 0.03, "learning_rate": 4.985629754860524e-05, "loss": 0.4255, "step": 34, "task_loss": 0.6770200729370117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46286115050315857, "epoch": 0.03, "learning_rate": 4.985207100591716e-05, "loss": 0.4156, "step": 35, "task_loss": 0.5535679459571838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21832047402858734, "epoch": 0.03, "learning_rate": 4.984784446322908e-05, "loss": 0.3233, "step": 36, "task_loss": 0.27768674492836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2571682929992676, "epoch": 0.03, "learning_rate": 4.9843617920541006e-05, "loss": 0.3658, "step": 37, "task_loss": 0.7903686761856079 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4108579158782959, "epoch": 0.03, "learning_rate": 4.983939137785292e-05, "loss": 0.4139, "step": 38, "task_loss": 0.3737262785434723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2741393744945526, "epoch": 0.03, "learning_rate": 4.983516483516484e-05, "loss": 0.3012, "step": 39, "task_loss": 0.5845351815223694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21137279272079468, "epoch": 0.03, "learning_rate": 4.983093829247676e-05, "loss": 0.3481, "step": 40, "task_loss": 1.2179186344146729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47420579195022583, "epoch": 0.03, "learning_rate": 4.982671174978867e-05, "loss": 0.3995, "step": 41, "task_loss": 0.5109229683876038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19544543325901031, "epoch": 0.04, "learning_rate": 4.98224852071006e-05, "loss": 0.3171, "step": 42, "task_loss": 0.26069581508636475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30108460783958435, "epoch": 0.04, "learning_rate": 4.981825866441252e-05, "loss": 0.4314, "step": 43, "task_loss": 1.0975393056869507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21767646074295044, "epoch": 0.04, "learning_rate": 4.981403212172443e-05, "loss": 0.3726, "step": 44, "task_loss": 0.4263548254966736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5715851187705994, "epoch": 0.04, "learning_rate": 4.980980557903635e-05, "loss": 0.4301, "step": 45, "task_loss": 0.7781269550323486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4327733814716339, "epoch": 0.04, "learning_rate": 4.980557903634827e-05, "loss": 0.4044, "step": 46, "task_loss": 0.6987329721450806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2926572561264038, "epoch": 0.04, "learning_rate": 4.980135249366019e-05, "loss": 0.4134, "step": 47, "task_loss": 0.9054892659187317 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2079082876443863, "epoch": 0.04, "learning_rate": 4.979712595097211e-05, "loss": 0.2983, "step": 48, "task_loss": 0.1822921335697174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3556250035762787, "epoch": 0.04, "learning_rate": 4.979289940828403e-05, "loss": 0.333, "step": 49, "task_loss": 0.21551582217216492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2854697108268738, "epoch": 0.04, "learning_rate": 4.978867286559594e-05, "loss": 0.2963, "step": 50, "task_loss": 0.47764793038368225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3259928226470947, "epoch": 0.04, "learning_rate": 4.978444632290786e-05, "loss": 0.3551, "step": 51, "task_loss": 0.4995303153991699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22598080337047577, "epoch": 0.04, "learning_rate": 4.978021978021978e-05, "loss": 0.3064, "step": 52, "task_loss": 0.28154799342155457 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25202736258506775, "epoch": 0.04, "learning_rate": 4.97759932375317e-05, "loss": 0.3011, "step": 53, "task_loss": 0.23623007535934448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2742934226989746, "epoch": 0.05, "learning_rate": 4.977176669484362e-05, "loss": 0.333, "step": 54, "task_loss": 0.49772605299949646 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20836201310157776, "epoch": 0.05, "learning_rate": 4.976754015215554e-05, "loss": 0.3544, "step": 55, "task_loss": 1.1939759254455566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2060907930135727, "epoch": 0.05, "learning_rate": 4.976331360946746e-05, "loss": 0.3227, "step": 56, "task_loss": 0.744167685508728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1929110884666443, "epoch": 0.05, "learning_rate": 4.975908706677937e-05, "loss": 0.3204, "step": 57, "task_loss": 0.5006382465362549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44293028116226196, "epoch": 0.05, "learning_rate": 4.975486052409129e-05, "loss": 0.419, "step": 58, "task_loss": 0.31968680024147034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21261683106422424, "epoch": 0.05, "learning_rate": 4.975063398140322e-05, "loss": 0.2863, "step": 59, "task_loss": 0.16719569265842438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33205848932266235, "epoch": 0.05, "learning_rate": 4.974640743871513e-05, "loss": 0.3029, "step": 60, "task_loss": 0.36968085169792175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48490577936172485, "epoch": 0.05, "learning_rate": 4.974218089602705e-05, "loss": 0.4028, "step": 61, "task_loss": 0.7566177845001221 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22474291920661926, "epoch": 0.05, "learning_rate": 4.973795435333897e-05, "loss": 0.3067, "step": 62, "task_loss": 1.0528796911239624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3649987578392029, "epoch": 0.05, "learning_rate": 4.9733727810650885e-05, "loss": 0.3056, "step": 63, "task_loss": 0.6386075615882874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5773598551750183, "epoch": 0.05, "learning_rate": 4.9729501267962805e-05, "loss": 0.4557, "step": 64, "task_loss": 0.6312403678894043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11294424533843994, "epoch": 0.05, "learning_rate": 4.972527472527473e-05, "loss": 0.2713, "step": 65, "task_loss": 0.34500980377197266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34409666061401367, "epoch": 0.06, "learning_rate": 4.972104818258665e-05, "loss": 0.3823, "step": 66, "task_loss": 0.9285640716552734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4020559787750244, "epoch": 0.06, "learning_rate": 4.9716821639898564e-05, "loss": 0.3368, "step": 67, "task_loss": 1.0970407724380493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47366437315940857, "epoch": 0.06, "learning_rate": 4.9712595097210484e-05, "loss": 0.3769, "step": 68, "task_loss": 0.21794253587722778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4191763699054718, "epoch": 0.06, "learning_rate": 4.97083685545224e-05, "loss": 0.3188, "step": 69, "task_loss": 0.7065517902374268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5290296077728271, "epoch": 0.06, "learning_rate": 4.970414201183432e-05, "loss": 0.463, "step": 70, "task_loss": 0.6956005096435547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.419831246137619, "epoch": 0.06, "learning_rate": 4.969991546914624e-05, "loss": 0.4419, "step": 71, "task_loss": 1.0496934652328491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2980637848377228, "epoch": 0.06, "learning_rate": 4.969568892645816e-05, "loss": 0.387, "step": 72, "task_loss": 0.4555947184562683 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28176164627075195, "epoch": 0.06, "learning_rate": 4.9691462383770076e-05, "loss": 0.3574, "step": 73, "task_loss": 0.5821244120597839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5036299228668213, "epoch": 0.06, "learning_rate": 4.9687235841081995e-05, "loss": 0.4155, "step": 74, "task_loss": 0.7105315327644348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7065041065216064, "epoch": 0.06, "learning_rate": 4.9683009298393915e-05, "loss": 0.4075, "step": 75, "task_loss": 1.288073182106018 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22645768523216248, "epoch": 0.06, "learning_rate": 4.9678782755705835e-05, "loss": 0.4971, "step": 76, "task_loss": 0.45604434609413147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22015847265720367, "epoch": 0.07, "learning_rate": 4.9674556213017755e-05, "loss": 0.2785, "step": 77, "task_loss": 0.14051614701747894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.335431843996048, "epoch": 0.07, "learning_rate": 4.9670329670329674e-05, "loss": 0.3127, "step": 78, "task_loss": 0.6418079137802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2796747386455536, "epoch": 0.07, "learning_rate": 4.966610312764159e-05, "loss": 0.3599, "step": 79, "task_loss": 0.3722460865974426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5145119428634644, "epoch": 0.07, "learning_rate": 4.966187658495351e-05, "loss": 0.3894, "step": 80, "task_loss": 0.5264815092086792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3600746989250183, "epoch": 0.07, "learning_rate": 4.965765004226543e-05, "loss": 0.3373, "step": 81, "task_loss": 1.6320440769195557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28211796283721924, "epoch": 0.07, "learning_rate": 4.965342349957735e-05, "loss": 0.2809, "step": 82, "task_loss": 0.6417269706726074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3130403459072113, "epoch": 0.07, "learning_rate": 4.9649196956889266e-05, "loss": 0.2982, "step": 83, "task_loss": 0.25361379981040955 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3775560259819031, "epoch": 0.07, "learning_rate": 4.9644970414201186e-05, "loss": 0.3535, "step": 84, "task_loss": 0.48491528630256653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28904205560684204, "epoch": 0.07, "learning_rate": 4.9640743871513106e-05, "loss": 0.3024, "step": 85, "task_loss": 1.2618898153305054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2344900667667389, "epoch": 0.07, "learning_rate": 4.963651732882502e-05, "loss": 0.471, "step": 86, "task_loss": 0.2024116814136505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.457430899143219, "epoch": 0.07, "learning_rate": 4.9632290786136945e-05, "loss": 0.4479, "step": 87, "task_loss": 0.9474542737007141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27561503648757935, "epoch": 0.07, "learning_rate": 4.9628064243448865e-05, "loss": 0.2618, "step": 88, "task_loss": 0.6162384748458862 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21126046776771545, "epoch": 0.08, "learning_rate": 4.962383770076078e-05, "loss": 0.2443, "step": 89, "task_loss": 0.1814127415418625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35313093662261963, "epoch": 0.08, "learning_rate": 4.96196111580727e-05, "loss": 0.3678, "step": 90, "task_loss": 0.453416109085083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4890415072441101, "epoch": 0.08, "learning_rate": 4.961538461538462e-05, "loss": 0.3357, "step": 91, "task_loss": 0.49295541644096375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7142332792282104, "epoch": 0.08, "learning_rate": 4.961115807269654e-05, "loss": 0.4798, "step": 92, "task_loss": 0.6236546635627747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3460538387298584, "epoch": 0.08, "learning_rate": 4.960693153000846e-05, "loss": 0.5107, "step": 93, "task_loss": 1.159478783607483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27619296312332153, "epoch": 0.08, "learning_rate": 4.9602704987320377e-05, "loss": 0.3218, "step": 94, "task_loss": 0.35865139961242676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6670293211936951, "epoch": 0.08, "learning_rate": 4.9598478444632296e-05, "loss": 0.5086, "step": 95, "task_loss": 0.8902304172515869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35298633575439453, "epoch": 0.08, "learning_rate": 4.959425190194421e-05, "loss": 0.4552, "step": 96, "task_loss": 1.6388323307037354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3052392303943634, "epoch": 0.08, "learning_rate": 4.959002535925613e-05, "loss": 0.2989, "step": 97, "task_loss": 0.635475754737854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.359167218208313, "epoch": 0.08, "learning_rate": 4.958579881656805e-05, "loss": 0.3379, "step": 98, "task_loss": 0.845808207988739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1947636753320694, "epoch": 0.08, "learning_rate": 4.958157227387997e-05, "loss": 0.2466, "step": 99, "task_loss": 0.40310221910476685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5416989326477051, "epoch": 0.08, "learning_rate": 4.957734573119189e-05, "loss": 0.4943, "step": 100, "task_loss": 1.3766098022460938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34332895278930664, "epoch": 0.09, "learning_rate": 4.957311918850381e-05, "loss": 0.3289, "step": 101, "task_loss": 0.3026656210422516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28686532378196716, "epoch": 0.09, "learning_rate": 4.956889264581572e-05, "loss": 0.3582, "step": 102, "task_loss": 1.122120976448059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4475458860397339, "epoch": 0.09, "learning_rate": 4.956466610312764e-05, "loss": 0.4738, "step": 103, "task_loss": 0.8384937047958374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49370914697647095, "epoch": 0.09, "learning_rate": 4.956043956043957e-05, "loss": 0.4248, "step": 104, "task_loss": 1.3479255437850952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4307611584663391, "epoch": 0.09, "learning_rate": 4.955621301775148e-05, "loss": 0.3323, "step": 105, "task_loss": 0.9533501863479614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1903613805770874, "epoch": 0.09, "learning_rate": 4.95519864750634e-05, "loss": 0.3374, "step": 106, "task_loss": 0.5240545868873596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2635105848312378, "epoch": 0.09, "learning_rate": 4.954775993237532e-05, "loss": 0.3491, "step": 107, "task_loss": 1.090175747871399 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25856882333755493, "epoch": 0.09, "learning_rate": 4.954353338968723e-05, "loss": 0.3718, "step": 108, "task_loss": 0.12843522429466248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5180971026420593, "epoch": 0.09, "learning_rate": 4.953930684699916e-05, "loss": 0.3546, "step": 109, "task_loss": 0.4853808581829071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2508522570133209, "epoch": 0.09, "learning_rate": 4.953508030431108e-05, "loss": 0.4383, "step": 110, "task_loss": 0.2109997421503067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32342344522476196, "epoch": 0.09, "learning_rate": 4.9530853761623e-05, "loss": 0.4014, "step": 111, "task_loss": 0.9165175557136536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2745503783226013, "epoch": 0.09, "learning_rate": 4.952662721893491e-05, "loss": 0.3822, "step": 112, "task_loss": 0.040358565747737885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31443509459495544, "epoch": 0.1, "learning_rate": 4.952240067624683e-05, "loss": 0.3267, "step": 113, "task_loss": 0.7784364223480225 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20579645037651062, "epoch": 0.1, "learning_rate": 4.951817413355875e-05, "loss": 0.411, "step": 114, "task_loss": 0.3764180541038513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4143642485141754, "epoch": 0.1, "learning_rate": 4.951394759087067e-05, "loss": 0.3356, "step": 115, "task_loss": 0.4431930482387543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26486262679100037, "epoch": 0.1, "learning_rate": 4.950972104818259e-05, "loss": 0.3657, "step": 116, "task_loss": 0.45691344141960144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5901132822036743, "epoch": 0.1, "learning_rate": 4.950549450549451e-05, "loss": 0.4279, "step": 117, "task_loss": 0.6737844944000244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6378105878829956, "epoch": 0.1, "learning_rate": 4.950126796280642e-05, "loss": 0.4166, "step": 118, "task_loss": 0.7184115648269653 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24172967672348022, "epoch": 0.1, "learning_rate": 4.949704142011834e-05, "loss": 0.4309, "step": 119, "task_loss": 0.2585623562335968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4076937437057495, "epoch": 0.1, "learning_rate": 4.949281487743026e-05, "loss": 0.5081, "step": 120, "task_loss": 1.1405022144317627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5878621339797974, "epoch": 0.1, "learning_rate": 4.948858833474218e-05, "loss": 0.4726, "step": 121, "task_loss": 0.25736790895462036 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16709516942501068, "epoch": 0.1, "learning_rate": 4.94843617920541e-05, "loss": 0.2761, "step": 122, "task_loss": 0.5165044665336609 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22008250653743744, "epoch": 0.1, "learning_rate": 4.948013524936602e-05, "loss": 0.2773, "step": 123, "task_loss": 0.8879871964454651 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4000396728515625, "epoch": 0.1, "learning_rate": 4.947590870667794e-05, "loss": 0.375, "step": 124, "task_loss": 0.7097489237785339 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8170579671859741, "epoch": 0.11, "learning_rate": 4.9471682163989855e-05, "loss": 0.4451, "step": 125, "task_loss": 0.4997040331363678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2630847692489624, "epoch": 0.11, "learning_rate": 4.946745562130178e-05, "loss": 0.2828, "step": 126, "task_loss": 1.1715372800827026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3628414273262024, "epoch": 0.11, "learning_rate": 4.94632290786137e-05, "loss": 0.3252, "step": 127, "task_loss": 0.3841911554336548 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26722654700279236, "epoch": 0.11, "learning_rate": 4.9459002535925614e-05, "loss": 0.4783, "step": 128, "task_loss": 2.1524083614349365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44716575741767883, "epoch": 0.11, "learning_rate": 4.9454775993237533e-05, "loss": 0.4692, "step": 129, "task_loss": 1.471771240234375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6284704804420471, "epoch": 0.11, "learning_rate": 4.945054945054945e-05, "loss": 0.3347, "step": 130, "task_loss": 0.25305724143981934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2816174030303955, "epoch": 0.11, "learning_rate": 4.944632290786137e-05, "loss": 0.336, "step": 131, "task_loss": 1.7037070989608765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18777093291282654, "epoch": 0.11, "learning_rate": 4.944209636517329e-05, "loss": 0.3004, "step": 132, "task_loss": 0.07924327254295349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49237024784088135, "epoch": 0.11, "learning_rate": 4.943786982248521e-05, "loss": 0.4681, "step": 133, "task_loss": 1.0658358335494995 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40326786041259766, "epoch": 0.11, "learning_rate": 4.9433643279797125e-05, "loss": 0.4265, "step": 134, "task_loss": 1.5436773300170898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13374197483062744, "epoch": 0.11, "learning_rate": 4.9429416737109045e-05, "loss": 0.2439, "step": 135, "task_loss": 0.40028491616249084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4446583688259125, "epoch": 0.11, "learning_rate": 4.9425190194420965e-05, "loss": 0.3879, "step": 136, "task_loss": 0.6489905714988708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2626723647117615, "epoch": 0.12, "learning_rate": 4.9420963651732885e-05, "loss": 0.3771, "step": 137, "task_loss": 0.15167920291423798 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.255208283662796, "epoch": 0.12, "learning_rate": 4.9416737109044804e-05, "loss": 0.3497, "step": 138, "task_loss": 0.6664825081825256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25952035188674927, "epoch": 0.12, "learning_rate": 4.9412510566356724e-05, "loss": 0.2659, "step": 139, "task_loss": 0.13492964208126068 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3707650303840637, "epoch": 0.12, "learning_rate": 4.9408284023668644e-05, "loss": 0.2525, "step": 140, "task_loss": 0.7116761803627014 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21200241148471832, "epoch": 0.12, "learning_rate": 4.940405748098056e-05, "loss": 0.3401, "step": 141, "task_loss": 0.7733478546142578 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48759669065475464, "epoch": 0.12, "learning_rate": 4.9399830938292477e-05, "loss": 0.3436, "step": 142, "task_loss": 1.0769908428192139 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38488003611564636, "epoch": 0.12, "learning_rate": 4.93956043956044e-05, "loss": 0.3408, "step": 143, "task_loss": 1.7284129858016968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2676427662372589, "epoch": 0.12, "learning_rate": 4.9391377852916316e-05, "loss": 0.3083, "step": 144, "task_loss": 1.0192649364471436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21843518316745758, "epoch": 0.12, "learning_rate": 4.9387151310228236e-05, "loss": 0.2872, "step": 145, "task_loss": 0.4018738865852356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6630820035934448, "epoch": 0.12, "learning_rate": 4.9382924767540155e-05, "loss": 0.3742, "step": 146, "task_loss": 1.3242998123168945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28332826495170593, "epoch": 0.12, "learning_rate": 4.937869822485207e-05, "loss": 0.4332, "step": 147, "task_loss": 0.5753346681594849 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25695425271987915, "epoch": 0.13, "learning_rate": 4.9374471682163995e-05, "loss": 0.315, "step": 148, "task_loss": 0.9013621211051941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25189918279647827, "epoch": 0.13, "learning_rate": 4.9370245139475915e-05, "loss": 0.4817, "step": 149, "task_loss": 0.228720560669899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4760388135910034, "epoch": 0.13, "learning_rate": 4.936601859678783e-05, "loss": 0.4217, "step": 150, "task_loss": 0.6111045479774475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4705445170402527, "epoch": 0.13, "learning_rate": 4.936179205409975e-05, "loss": 0.5399, "step": 151, "task_loss": 1.0512099266052246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29746076464653015, "epoch": 0.13, "learning_rate": 4.935756551141167e-05, "loss": 0.4229, "step": 152, "task_loss": 0.2905904948711395 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3166247010231018, "epoch": 0.13, "learning_rate": 4.935333896872359e-05, "loss": 0.4384, "step": 153, "task_loss": 0.6401548981666565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5391810536384583, "epoch": 0.13, "learning_rate": 4.934911242603551e-05, "loss": 0.4406, "step": 154, "task_loss": 0.442170649766922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3679506778717041, "epoch": 0.13, "learning_rate": 4.9344885883347426e-05, "loss": 0.4341, "step": 155, "task_loss": 0.7660558223724365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44703105092048645, "epoch": 0.13, "learning_rate": 4.9340659340659346e-05, "loss": 0.3725, "step": 156, "task_loss": 0.31833499670028687 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3736255466938019, "epoch": 0.13, "learning_rate": 4.933643279797126e-05, "loss": 0.3678, "step": 157, "task_loss": 0.9408439993858337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5729640126228333, "epoch": 0.13, "learning_rate": 4.933220625528318e-05, "loss": 0.5446, "step": 158, "task_loss": 0.3189557194709778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23309840261936188, "epoch": 0.13, "learning_rate": 4.93279797125951e-05, "loss": 0.4131, "step": 159, "task_loss": 0.08921865373849869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24005305767059326, "epoch": 0.14, "learning_rate": 4.932375316990702e-05, "loss": 0.2883, "step": 160, "task_loss": 0.696487307548523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.548248827457428, "epoch": 0.14, "learning_rate": 4.931952662721894e-05, "loss": 0.3492, "step": 161, "task_loss": 0.8208967447280884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.09402120858430862, "epoch": 0.14, "learning_rate": 4.931530008453086e-05, "loss": 0.2299, "step": 162, "task_loss": 0.01031099446117878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2072228193283081, "epoch": 0.14, "learning_rate": 4.931107354184277e-05, "loss": 0.2668, "step": 163, "task_loss": 0.5715734958648682 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4195344150066376, "epoch": 0.14, "learning_rate": 4.930684699915469e-05, "loss": 0.3986, "step": 164, "task_loss": 0.8805137872695923 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4367620348930359, "epoch": 0.14, "learning_rate": 4.930262045646662e-05, "loss": 0.3233, "step": 165, "task_loss": 1.0261000394821167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4603279232978821, "epoch": 0.14, "learning_rate": 4.929839391377853e-05, "loss": 0.4211, "step": 166, "task_loss": 0.8634194731712341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4213913381099701, "epoch": 0.14, "learning_rate": 4.929416737109045e-05, "loss": 0.373, "step": 167, "task_loss": 1.4583817720413208 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20449984073638916, "epoch": 0.14, "learning_rate": 4.928994082840237e-05, "loss": 0.4052, "step": 168, "task_loss": 0.14410758018493652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23656447231769562, "epoch": 0.14, "learning_rate": 4.928571428571429e-05, "loss": 0.2618, "step": 169, "task_loss": 0.05356917530298233 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.244521826505661, "epoch": 0.14, "learning_rate": 4.928148774302621e-05, "loss": 0.2705, "step": 170, "task_loss": 0.2933933436870575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30323511362075806, "epoch": 0.14, "learning_rate": 4.927726120033813e-05, "loss": 0.2563, "step": 171, "task_loss": 0.49771153926849365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30565589666366577, "epoch": 0.15, "learning_rate": 4.927303465765005e-05, "loss": 0.2989, "step": 172, "task_loss": 0.282042920589447 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36005306243896484, "epoch": 0.15, "learning_rate": 4.926880811496196e-05, "loss": 0.3026, "step": 173, "task_loss": 0.36329495906829834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30820009112358093, "epoch": 0.15, "learning_rate": 4.926458157227388e-05, "loss": 0.3365, "step": 174, "task_loss": 1.2730638980865479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3730316460132599, "epoch": 0.15, "learning_rate": 4.92603550295858e-05, "loss": 0.3356, "step": 175, "task_loss": 0.489788681268692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2243185043334961, "epoch": 0.15, "learning_rate": 4.925612848689772e-05, "loss": 0.3381, "step": 176, "task_loss": 0.4047553837299347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3008669316768646, "epoch": 0.15, "learning_rate": 4.925190194420964e-05, "loss": 0.2945, "step": 177, "task_loss": 0.466147780418396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19667010009288788, "epoch": 0.15, "learning_rate": 4.924767540152156e-05, "loss": 0.2742, "step": 178, "task_loss": 0.5776610970497131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21384766697883606, "epoch": 0.15, "learning_rate": 4.924344885883347e-05, "loss": 0.375, "step": 179, "task_loss": 0.3693738579750061 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2983366847038269, "epoch": 0.15, "learning_rate": 4.923922231614539e-05, "loss": 0.3567, "step": 180, "task_loss": 0.609382152557373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30045539140701294, "epoch": 0.15, "learning_rate": 4.923499577345731e-05, "loss": 0.3851, "step": 181, "task_loss": 0.38646233081817627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2023361772298813, "epoch": 0.15, "learning_rate": 4.923076923076924e-05, "loss": 0.2859, "step": 182, "task_loss": 0.8480668067932129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24268339574337006, "epoch": 0.15, "learning_rate": 4.922654268808115e-05, "loss": 0.3398, "step": 183, "task_loss": 0.6510738134384155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23291978240013123, "epoch": 0.16, "learning_rate": 4.922231614539307e-05, "loss": 0.3482, "step": 184, "task_loss": 1.3659625053405762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3567195236682892, "epoch": 0.16, "learning_rate": 4.921808960270499e-05, "loss": 0.4689, "step": 185, "task_loss": 0.8014994859695435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.268853098154068, "epoch": 0.16, "learning_rate": 4.9213863060016904e-05, "loss": 0.3964, "step": 186, "task_loss": 0.9685837030410767 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10869912803173065, "epoch": 0.16, "learning_rate": 4.920963651732883e-05, "loss": 0.2751, "step": 187, "task_loss": 0.37290409207344055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25897130370140076, "epoch": 0.16, "learning_rate": 4.920540997464075e-05, "loss": 0.3332, "step": 188, "task_loss": 0.43131357431411743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35233813524246216, "epoch": 0.16, "learning_rate": 4.9201183431952664e-05, "loss": 0.3468, "step": 189, "task_loss": 1.5360071659088135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18634657561779022, "epoch": 0.16, "learning_rate": 4.919695688926458e-05, "loss": 0.2161, "step": 190, "task_loss": 0.5493410229682922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12940114736557007, "epoch": 0.16, "learning_rate": 4.91927303465765e-05, "loss": 0.2754, "step": 191, "task_loss": 0.028904523700475693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3526163101196289, "epoch": 0.16, "learning_rate": 4.9188503803888416e-05, "loss": 0.3042, "step": 192, "task_loss": 0.7353550791740417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35715556144714355, "epoch": 0.16, "learning_rate": 4.918427726120034e-05, "loss": 0.3503, "step": 193, "task_loss": 0.39157605171203613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38395220041275024, "epoch": 0.16, "learning_rate": 4.918005071851226e-05, "loss": 0.3159, "step": 194, "task_loss": 0.5014969110488892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4627022445201874, "epoch": 0.16, "learning_rate": 4.9175824175824175e-05, "loss": 0.3327, "step": 195, "task_loss": 0.5290047526359558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20345164835453033, "epoch": 0.17, "learning_rate": 4.9171597633136095e-05, "loss": 0.2885, "step": 196, "task_loss": 0.8092126250267029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6022716760635376, "epoch": 0.17, "learning_rate": 4.9167371090448015e-05, "loss": 0.3501, "step": 197, "task_loss": 0.6652913689613342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4178740382194519, "epoch": 0.17, "learning_rate": 4.9163144547759934e-05, "loss": 0.4446, "step": 198, "task_loss": 0.5839725732803345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24084614217281342, "epoch": 0.17, "learning_rate": 4.9158918005071854e-05, "loss": 0.3208, "step": 199, "task_loss": 0.6247647404670715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5380869507789612, "epoch": 0.17, "learning_rate": 4.9154691462383774e-05, "loss": 0.4439, "step": 200, "task_loss": 1.4319199323654175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28592145442962646, "epoch": 0.17, "learning_rate": 4.9150464919695694e-05, "loss": 0.2889, "step": 201, "task_loss": 0.6194091439247131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26645004749298096, "epoch": 0.17, "learning_rate": 4.914623837700761e-05, "loss": 0.307, "step": 202, "task_loss": 0.6286120414733887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16263580322265625, "epoch": 0.17, "learning_rate": 4.9142011834319526e-05, "loss": 0.2924, "step": 203, "task_loss": 0.24746073782444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21327966451644897, "epoch": 0.17, "learning_rate": 4.913778529163145e-05, "loss": 0.3467, "step": 204, "task_loss": 0.4032347500324249 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5899863243103027, "epoch": 0.17, "learning_rate": 4.9133558748943366e-05, "loss": 0.3852, "step": 205, "task_loss": 0.2813647389411926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15700814127922058, "epoch": 0.17, "learning_rate": 4.9129332206255286e-05, "loss": 0.356, "step": 206, "task_loss": 0.19302156567573547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3096359670162201, "epoch": 0.17, "learning_rate": 4.9125105663567205e-05, "loss": 0.356, "step": 207, "task_loss": 0.8887370824813843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19024589657783508, "epoch": 0.18, "learning_rate": 4.912087912087912e-05, "loss": 0.2885, "step": 208, "task_loss": 0.7476159930229187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6751109957695007, "epoch": 0.18, "learning_rate": 4.911665257819104e-05, "loss": 0.3892, "step": 209, "task_loss": 0.6961216926574707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22509407997131348, "epoch": 0.18, "learning_rate": 4.9112426035502965e-05, "loss": 0.2569, "step": 210, "task_loss": 0.46005940437316895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20882007479667664, "epoch": 0.18, "learning_rate": 4.9108199492814884e-05, "loss": 0.3174, "step": 211, "task_loss": 0.35927194356918335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23338523507118225, "epoch": 0.18, "learning_rate": 4.91039729501268e-05, "loss": 0.2839, "step": 212, "task_loss": 0.5691384077072144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18294541537761688, "epoch": 0.18, "learning_rate": 4.909974640743872e-05, "loss": 0.2884, "step": 213, "task_loss": 0.08627941459417343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24400398135185242, "epoch": 0.18, "learning_rate": 4.909551986475064e-05, "loss": 0.3955, "step": 214, "task_loss": 1.446928858757019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2911604046821594, "epoch": 0.18, "learning_rate": 4.9091293322062556e-05, "loss": 0.3205, "step": 215, "task_loss": 0.5143615007400513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28537502884864807, "epoch": 0.18, "learning_rate": 4.9087066779374476e-05, "loss": 0.3005, "step": 216, "task_loss": 1.0182530879974365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5088628530502319, "epoch": 0.18, "learning_rate": 4.9082840236686396e-05, "loss": 0.4478, "step": 217, "task_loss": 1.3089011907577515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38104909658432007, "epoch": 0.18, "learning_rate": 4.907861369399831e-05, "loss": 0.3935, "step": 218, "task_loss": 0.4928361177444458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3787882328033447, "epoch": 0.19, "learning_rate": 4.907438715131023e-05, "loss": 0.3754, "step": 219, "task_loss": 0.650814950466156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2550523579120636, "epoch": 0.19, "learning_rate": 4.907016060862215e-05, "loss": 0.3334, "step": 220, "task_loss": 0.5834181904792786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2224728763103485, "epoch": 0.19, "learning_rate": 4.906593406593407e-05, "loss": 0.2642, "step": 221, "task_loss": 0.3143998086452484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23832443356513977, "epoch": 0.19, "learning_rate": 4.906170752324599e-05, "loss": 0.322, "step": 222, "task_loss": 1.4493589401245117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6176878213882446, "epoch": 0.19, "learning_rate": 4.905748098055791e-05, "loss": 0.4383, "step": 223, "task_loss": 0.7622699737548828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2898561656475067, "epoch": 0.19, "learning_rate": 4.905325443786982e-05, "loss": 0.3635, "step": 224, "task_loss": 0.5839920043945312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3696010708808899, "epoch": 0.19, "learning_rate": 4.904902789518174e-05, "loss": 0.3661, "step": 225, "task_loss": 0.7432633638381958 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20091712474822998, "epoch": 0.19, "learning_rate": 4.904480135249366e-05, "loss": 0.2965, "step": 226, "task_loss": 0.32529860734939575 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39854955673217773, "epoch": 0.19, "learning_rate": 4.9040574809805587e-05, "loss": 0.4011, "step": 227, "task_loss": 0.40981027483940125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2788071036338806, "epoch": 0.19, "learning_rate": 4.90363482671175e-05, "loss": 0.4525, "step": 228, "task_loss": 1.2623895406723022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46013692021369934, "epoch": 0.19, "learning_rate": 4.903212172442942e-05, "loss": 0.5073, "step": 229, "task_loss": 1.6259897947311401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3463882505893707, "epoch": 0.19, "learning_rate": 4.902789518174134e-05, "loss": 0.2795, "step": 230, "task_loss": 0.9291456937789917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29880496859550476, "epoch": 0.2, "learning_rate": 4.902366863905325e-05, "loss": 0.3014, "step": 231, "task_loss": 0.7562724351882935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24067357182502747, "epoch": 0.2, "learning_rate": 4.901944209636518e-05, "loss": 0.3351, "step": 232, "task_loss": 1.0021733045578003 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.319251149892807, "epoch": 0.2, "learning_rate": 4.90152155536771e-05, "loss": 0.2629, "step": 233, "task_loss": 0.45998692512512207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24535714089870453, "epoch": 0.2, "learning_rate": 4.901098901098901e-05, "loss": 0.4441, "step": 234, "task_loss": 0.20303231477737427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2730843424797058, "epoch": 0.2, "learning_rate": 4.900676246830093e-05, "loss": 0.3293, "step": 235, "task_loss": 0.5525544881820679 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19751249253749847, "epoch": 0.2, "learning_rate": 4.900253592561285e-05, "loss": 0.2836, "step": 236, "task_loss": 0.4372982382774353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30453741550445557, "epoch": 0.2, "learning_rate": 4.899830938292477e-05, "loss": 0.3706, "step": 237, "task_loss": 1.265528917312622 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1653909683227539, "epoch": 0.2, "learning_rate": 4.899408284023669e-05, "loss": 0.4835, "step": 238, "task_loss": 1.224724292755127 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3479859530925751, "epoch": 0.2, "learning_rate": 4.898985629754861e-05, "loss": 0.3989, "step": 239, "task_loss": 0.655051052570343 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20435407757759094, "epoch": 0.2, "learning_rate": 4.898562975486053e-05, "loss": 0.2831, "step": 240, "task_loss": 0.28512734174728394 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4552597403526306, "epoch": 0.2, "learning_rate": 4.898140321217244e-05, "loss": 0.3285, "step": 241, "task_loss": 0.4700802266597748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24903175234794617, "epoch": 0.2, "learning_rate": 4.897717666948436e-05, "loss": 0.3497, "step": 242, "task_loss": 0.401705801486969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33746951818466187, "epoch": 0.21, "learning_rate": 4.897295012679628e-05, "loss": 0.3429, "step": 243, "task_loss": 0.26708200573921204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3835751712322235, "epoch": 0.21, "learning_rate": 4.89687235841082e-05, "loss": 0.2924, "step": 244, "task_loss": 0.7240639925003052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3886156678199768, "epoch": 0.21, "learning_rate": 4.896449704142012e-05, "loss": 0.3834, "step": 245, "task_loss": 0.26518186926841736 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30780693888664246, "epoch": 0.21, "learning_rate": 4.896027049873204e-05, "loss": 0.3604, "step": 246, "task_loss": 0.5353126525878906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2051931768655777, "epoch": 0.21, "learning_rate": 4.8956043956043954e-05, "loss": 0.3104, "step": 247, "task_loss": 0.45676663517951965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25273922085762024, "epoch": 0.21, "learning_rate": 4.8951817413355874e-05, "loss": 0.3145, "step": 248, "task_loss": 0.20090925693511963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5339232683181763, "epoch": 0.21, "learning_rate": 4.89475908706678e-05, "loss": 0.4151, "step": 249, "task_loss": 0.4397232234477997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49212467670440674, "epoch": 0.21, "learning_rate": 4.8943364327979713e-05, "loss": 0.3511, "step": 250, "task_loss": 0.25163865089416504 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5351613163948059, "epoch": 0.21, "learning_rate": 4.893913778529163e-05, "loss": 0.3722, "step": 251, "task_loss": 1.080898642539978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2241917848587036, "epoch": 0.21, "learning_rate": 4.893491124260355e-05, "loss": 0.3855, "step": 252, "task_loss": 0.7277026176452637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23997260630130768, "epoch": 0.21, "learning_rate": 4.8930684699915466e-05, "loss": 0.4527, "step": 253, "task_loss": 0.6579324007034302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41035687923431396, "epoch": 0.21, "learning_rate": 4.892645815722739e-05, "loss": 0.2967, "step": 254, "task_loss": 1.1071937084197998 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4491110146045685, "epoch": 0.22, "learning_rate": 4.892223161453931e-05, "loss": 0.3185, "step": 255, "task_loss": 0.6341534852981567 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31548792123794556, "epoch": 0.22, "learning_rate": 4.891800507185123e-05, "loss": 0.3219, "step": 256, "task_loss": 0.6876879930496216 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3046998679637909, "epoch": 0.22, "learning_rate": 4.8913778529163145e-05, "loss": 0.3781, "step": 257, "task_loss": 0.5512405633926392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35629910230636597, "epoch": 0.22, "learning_rate": 4.8909551986475065e-05, "loss": 0.4538, "step": 258, "task_loss": 0.9044925570487976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47341591119766235, "epoch": 0.22, "learning_rate": 4.8905325443786984e-05, "loss": 0.3629, "step": 259, "task_loss": 1.0995041131973267 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16987548768520355, "epoch": 0.22, "learning_rate": 4.8901098901098904e-05, "loss": 0.2282, "step": 260, "task_loss": 0.08861540257930756 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3050326704978943, "epoch": 0.22, "learning_rate": 4.8896872358410824e-05, "loss": 0.3364, "step": 261, "task_loss": 1.1310858726501465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16695277392864227, "epoch": 0.22, "learning_rate": 4.8892645815722744e-05, "loss": 0.3632, "step": 262, "task_loss": 0.4487459659576416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46074435114860535, "epoch": 0.22, "learning_rate": 4.8888419273034656e-05, "loss": 0.3622, "step": 263, "task_loss": 0.5444375872612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1946798712015152, "epoch": 0.22, "learning_rate": 4.8884192730346576e-05, "loss": 0.253, "step": 264, "task_loss": 0.38776695728302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2863951325416565, "epoch": 0.22, "learning_rate": 4.8879966187658496e-05, "loss": 0.3384, "step": 265, "task_loss": 0.5527970194816589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20850254595279694, "epoch": 0.22, "learning_rate": 4.8875739644970416e-05, "loss": 0.3121, "step": 266, "task_loss": 0.21559667587280273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3922833800315857, "epoch": 0.23, "learning_rate": 4.8871513102282335e-05, "loss": 0.3714, "step": 267, "task_loss": 1.0098217725753784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33430397510528564, "epoch": 0.23, "learning_rate": 4.8867286559594255e-05, "loss": 0.3824, "step": 268, "task_loss": 0.7453335523605347 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7871788740158081, "epoch": 0.23, "learning_rate": 4.8863060016906175e-05, "loss": 0.5347, "step": 269, "task_loss": 0.6393784284591675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38466086983680725, "epoch": 0.23, "learning_rate": 4.885883347421809e-05, "loss": 0.2949, "step": 270, "task_loss": 0.32888132333755493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3655931055545807, "epoch": 0.23, "learning_rate": 4.8854606931530014e-05, "loss": 0.3399, "step": 271, "task_loss": 1.2650548219680786 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.199500173330307, "epoch": 0.23, "learning_rate": 4.8850380388841934e-05, "loss": 0.4179, "step": 272, "task_loss": 0.200534850358963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20676256716251373, "epoch": 0.23, "learning_rate": 4.884615384615385e-05, "loss": 0.3105, "step": 273, "task_loss": 0.13629695773124695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35268110036849976, "epoch": 0.23, "learning_rate": 4.884192730346577e-05, "loss": 0.3439, "step": 274, "task_loss": 0.3449114263057709 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19986888766288757, "epoch": 0.23, "learning_rate": 4.8837700760777687e-05, "loss": 0.2483, "step": 275, "task_loss": 0.08239832520484924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36058855056762695, "epoch": 0.23, "learning_rate": 4.8833474218089606e-05, "loss": 0.3632, "step": 276, "task_loss": 0.344508558511734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17777377367019653, "epoch": 0.23, "learning_rate": 4.8829247675401526e-05, "loss": 0.2685, "step": 277, "task_loss": 0.16075986623764038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31643208861351013, "epoch": 0.23, "learning_rate": 4.8825021132713446e-05, "loss": 0.263, "step": 278, "task_loss": 0.30614912509918213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24418959021568298, "epoch": 0.24, "learning_rate": 4.882079459002536e-05, "loss": 0.3307, "step": 279, "task_loss": 0.44672325253486633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3086036145687103, "epoch": 0.24, "learning_rate": 4.881656804733728e-05, "loss": 0.361, "step": 280, "task_loss": 0.5483068227767944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2502906322479248, "epoch": 0.24, "learning_rate": 4.88123415046492e-05, "loss": 0.3194, "step": 281, "task_loss": 0.7591536045074463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5221872329711914, "epoch": 0.24, "learning_rate": 4.880811496196112e-05, "loss": 0.3584, "step": 282, "task_loss": 0.8819484114646912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32768136262893677, "epoch": 0.24, "learning_rate": 4.880388841927304e-05, "loss": 0.4272, "step": 283, "task_loss": 0.9988192915916443 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3342861235141754, "epoch": 0.24, "learning_rate": 4.879966187658496e-05, "loss": 0.3513, "step": 284, "task_loss": 0.6095466613769531 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29538947343826294, "epoch": 0.24, "learning_rate": 4.879543533389688e-05, "loss": 0.2872, "step": 285, "task_loss": 0.6634162664413452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17239470779895782, "epoch": 0.24, "learning_rate": 4.879120879120879e-05, "loss": 0.3073, "step": 286, "task_loss": 0.2715796232223511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34022772312164307, "epoch": 0.24, "learning_rate": 4.878698224852071e-05, "loss": 0.3572, "step": 287, "task_loss": 0.46570080518722534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.586940348148346, "epoch": 0.24, "learning_rate": 4.8782755705832636e-05, "loss": 0.4532, "step": 288, "task_loss": 0.28270670771598816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3450503945350647, "epoch": 0.24, "learning_rate": 4.877852916314455e-05, "loss": 0.3776, "step": 289, "task_loss": 1.6055642366409302 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23230475187301636, "epoch": 0.24, "learning_rate": 4.877430262045647e-05, "loss": 0.3155, "step": 290, "task_loss": 0.7673444151878357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5032759308815002, "epoch": 0.25, "learning_rate": 4.877007607776839e-05, "loss": 0.4087, "step": 291, "task_loss": 0.4153584837913513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20205602049827576, "epoch": 0.25, "learning_rate": 4.87658495350803e-05, "loss": 0.279, "step": 292, "task_loss": 0.2720683217048645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3824338912963867, "epoch": 0.25, "learning_rate": 4.876162299239223e-05, "loss": 0.3928, "step": 293, "task_loss": 0.43445995450019836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39851096272468567, "epoch": 0.25, "learning_rate": 4.875739644970415e-05, "loss": 0.383, "step": 294, "task_loss": 0.47297611832618713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23739580810070038, "epoch": 0.25, "learning_rate": 4.875316990701606e-05, "loss": 0.288, "step": 295, "task_loss": 0.28086382150650024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3505229949951172, "epoch": 0.25, "learning_rate": 4.874894336432798e-05, "loss": 0.3839, "step": 296, "task_loss": 0.9735152125358582 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31758180260658264, "epoch": 0.25, "learning_rate": 4.87447168216399e-05, "loss": 0.3954, "step": 297, "task_loss": 0.746353805065155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5545935034751892, "epoch": 0.25, "learning_rate": 4.874049027895182e-05, "loss": 0.4143, "step": 298, "task_loss": 0.7208579182624817 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25441351532936096, "epoch": 0.25, "learning_rate": 4.873626373626374e-05, "loss": 0.3798, "step": 299, "task_loss": 0.39883047342300415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4335569739341736, "epoch": 0.25, "learning_rate": 4.873203719357566e-05, "loss": 0.4675, "step": 300, "task_loss": 0.5771480798721313 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.200557142496109, "epoch": 0.25, "learning_rate": 4.872781065088758e-05, "loss": 0.3159, "step": 301, "task_loss": 0.08480185270309448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2761476933956146, "epoch": 0.26, "learning_rate": 4.872358410819949e-05, "loss": 0.3051, "step": 302, "task_loss": 0.6513972282409668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2249608039855957, "epoch": 0.26, "learning_rate": 4.871935756551141e-05, "loss": 0.3888, "step": 303, "task_loss": 0.5342172980308533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4718804359436035, "epoch": 0.26, "learning_rate": 4.871513102282333e-05, "loss": 0.3778, "step": 304, "task_loss": 1.6125479936599731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7248421311378479, "epoch": 0.26, "learning_rate": 4.871090448013525e-05, "loss": 0.4007, "step": 305, "task_loss": 0.9928491711616516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23324015736579895, "epoch": 0.26, "learning_rate": 4.870667793744717e-05, "loss": 0.3242, "step": 306, "task_loss": 0.18504105508327484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33958256244659424, "epoch": 0.26, "learning_rate": 4.870245139475909e-05, "loss": 0.3021, "step": 307, "task_loss": 0.9580204486846924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3680446147918701, "epoch": 0.26, "learning_rate": 4.8698224852071004e-05, "loss": 0.3885, "step": 308, "task_loss": 1.010891079902649 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4460088610649109, "epoch": 0.26, "learning_rate": 4.8693998309382924e-05, "loss": 0.3097, "step": 309, "task_loss": 0.44874387979507446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2667539715766907, "epoch": 0.26, "learning_rate": 4.868977176669485e-05, "loss": 0.4331, "step": 310, "task_loss": 1.2887728214263916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1914120465517044, "epoch": 0.26, "learning_rate": 4.868554522400676e-05, "loss": 0.4491, "step": 311, "task_loss": 0.14170601963996887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23233111202716827, "epoch": 0.26, "learning_rate": 4.868131868131868e-05, "loss": 0.2712, "step": 312, "task_loss": 0.27271774411201477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27626264095306396, "epoch": 0.26, "learning_rate": 4.86770921386306e-05, "loss": 0.2319, "step": 313, "task_loss": 0.6159605979919434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.231930673122406, "epoch": 0.27, "learning_rate": 4.867286559594252e-05, "loss": 0.3125, "step": 314, "task_loss": 0.47959962487220764 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35934150218963623, "epoch": 0.27, "learning_rate": 4.866863905325444e-05, "loss": 0.3128, "step": 315, "task_loss": 0.6822903156280518 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2281390279531479, "epoch": 0.27, "learning_rate": 4.866441251056636e-05, "loss": 0.3146, "step": 316, "task_loss": 0.1894877403974533 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4440813660621643, "epoch": 0.27, "learning_rate": 4.866018596787828e-05, "loss": 0.4482, "step": 317, "task_loss": 1.048689365386963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1617339700460434, "epoch": 0.27, "learning_rate": 4.8655959425190195e-05, "loss": 0.303, "step": 318, "task_loss": 0.5686681270599365 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38979366421699524, "epoch": 0.27, "learning_rate": 4.8651732882502114e-05, "loss": 0.3538, "step": 319, "task_loss": 0.605825662612915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3023730218410492, "epoch": 0.27, "learning_rate": 4.8647506339814034e-05, "loss": 0.2528, "step": 320, "task_loss": 0.5307630300521851 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3410377502441406, "epoch": 0.27, "learning_rate": 4.8643279797125954e-05, "loss": 0.4559, "step": 321, "task_loss": 1.0144431591033936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29597872495651245, "epoch": 0.27, "learning_rate": 4.8639053254437874e-05, "loss": 0.3011, "step": 322, "task_loss": 0.5639635324478149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5708921551704407, "epoch": 0.27, "learning_rate": 4.863482671174979e-05, "loss": 0.3387, "step": 323, "task_loss": 1.2709810733795166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3579939007759094, "epoch": 0.27, "learning_rate": 4.8630600169061706e-05, "loss": 0.3968, "step": 324, "task_loss": 0.6427634358406067 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31119680404663086, "epoch": 0.27, "learning_rate": 4.8626373626373626e-05, "loss": 0.4006, "step": 325, "task_loss": 0.3410301208496094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3839254677295685, "epoch": 0.28, "learning_rate": 4.8622147083685546e-05, "loss": 0.2692, "step": 326, "task_loss": 0.5920546650886536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31953269243240356, "epoch": 0.28, "learning_rate": 4.861792054099747e-05, "loss": 0.2991, "step": 327, "task_loss": 0.9777121543884277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19304096698760986, "epoch": 0.28, "learning_rate": 4.8613693998309385e-05, "loss": 0.3123, "step": 328, "task_loss": 0.5040408968925476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2374301254749298, "epoch": 0.28, "learning_rate": 4.8609467455621305e-05, "loss": 0.4676, "step": 329, "task_loss": 0.4793791174888611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49753913283348083, "epoch": 0.28, "learning_rate": 4.8605240912933225e-05, "loss": 0.4019, "step": 330, "task_loss": 0.3474293649196625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4127744436264038, "epoch": 0.28, "learning_rate": 4.860101437024514e-05, "loss": 0.6179, "step": 331, "task_loss": 0.3667612075805664 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.508611798286438, "epoch": 0.28, "learning_rate": 4.8596787827557064e-05, "loss": 0.3667, "step": 332, "task_loss": 0.5460253357887268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3820558786392212, "epoch": 0.28, "learning_rate": 4.8592561284868984e-05, "loss": 0.3105, "step": 333, "task_loss": 0.3513747751712799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26280924677848816, "epoch": 0.28, "learning_rate": 4.85883347421809e-05, "loss": 0.3708, "step": 334, "task_loss": 0.702881932258606 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3734799027442932, "epoch": 0.28, "learning_rate": 4.858410819949282e-05, "loss": 0.2676, "step": 335, "task_loss": 0.6505433917045593 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19496524333953857, "epoch": 0.28, "learning_rate": 4.8579881656804736e-05, "loss": 0.2688, "step": 336, "task_loss": 0.0567777082324028 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30779075622558594, "epoch": 0.28, "learning_rate": 4.857565511411665e-05, "loss": 0.3478, "step": 337, "task_loss": 0.9644184112548828 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3214837312698364, "epoch": 0.29, "learning_rate": 4.8571428571428576e-05, "loss": 0.381, "step": 338, "task_loss": 0.8313199877738953 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40075811743736267, "epoch": 0.29, "learning_rate": 4.8567202028740496e-05, "loss": 0.3495, "step": 339, "task_loss": 0.6049473881721497 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23903967440128326, "epoch": 0.29, "learning_rate": 4.856297548605241e-05, "loss": 0.2891, "step": 340, "task_loss": 0.9305899739265442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2694218158721924, "epoch": 0.29, "learning_rate": 4.855874894336433e-05, "loss": 0.3745, "step": 341, "task_loss": 0.45736974477767944 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23542985320091248, "epoch": 0.29, "learning_rate": 4.855452240067625e-05, "loss": 0.381, "step": 342, "task_loss": 0.1643262803554535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30288824439048767, "epoch": 0.29, "learning_rate": 4.855029585798817e-05, "loss": 0.3187, "step": 343, "task_loss": 0.4931211769580841 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4340369403362274, "epoch": 0.29, "learning_rate": 4.854606931530009e-05, "loss": 0.4145, "step": 344, "task_loss": 0.9261170029640198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2673012316226959, "epoch": 0.29, "learning_rate": 4.854184277261201e-05, "loss": 0.307, "step": 345, "task_loss": 0.9187374114990234 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25153711438179016, "epoch": 0.29, "learning_rate": 4.853761622992393e-05, "loss": 0.2396, "step": 346, "task_loss": 0.3297625482082367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48461300134658813, "epoch": 0.29, "learning_rate": 4.853338968723584e-05, "loss": 0.3127, "step": 347, "task_loss": 0.8848038911819458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32733801007270813, "epoch": 0.29, "learning_rate": 4.852916314454776e-05, "loss": 0.3665, "step": 348, "task_loss": 0.5857535004615784 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39943447709083557, "epoch": 0.29, "learning_rate": 4.8524936601859686e-05, "loss": 0.4085, "step": 349, "task_loss": 0.06979019939899445 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26431018114089966, "epoch": 0.3, "learning_rate": 4.85207100591716e-05, "loss": 0.4182, "step": 350, "task_loss": 0.16538062691688538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21030639111995697, "epoch": 0.3, "learning_rate": 4.851648351648352e-05, "loss": 0.3238, "step": 351, "task_loss": 0.5685637593269348 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2704937160015106, "epoch": 0.3, "learning_rate": 4.851225697379544e-05, "loss": 0.3425, "step": 352, "task_loss": 0.9471548199653625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3419402241706848, "epoch": 0.3, "learning_rate": 4.850803043110735e-05, "loss": 0.3797, "step": 353, "task_loss": 0.4429088532924652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34285545349121094, "epoch": 0.3, "learning_rate": 4.850380388841927e-05, "loss": 0.4639, "step": 354, "task_loss": 0.16208268702030182 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17453980445861816, "epoch": 0.3, "learning_rate": 4.84995773457312e-05, "loss": 0.3265, "step": 355, "task_loss": 0.1706947535276413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12912121415138245, "epoch": 0.3, "learning_rate": 4.849535080304311e-05, "loss": 0.3914, "step": 356, "task_loss": 0.8993921875953674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2798566222190857, "epoch": 0.3, "learning_rate": 4.849112426035503e-05, "loss": 0.2617, "step": 357, "task_loss": 0.2404172122478485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38516366481781006, "epoch": 0.3, "learning_rate": 4.848689771766695e-05, "loss": 0.3581, "step": 358, "task_loss": 0.7003974318504333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3995153307914734, "epoch": 0.3, "learning_rate": 4.848267117497887e-05, "loss": 0.3755, "step": 359, "task_loss": 0.6110330820083618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5429834723472595, "epoch": 0.3, "learning_rate": 4.847844463229079e-05, "loss": 0.3508, "step": 360, "task_loss": 0.4747626781463623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5203607082366943, "epoch": 0.3, "learning_rate": 4.847421808960271e-05, "loss": 0.4817, "step": 361, "task_loss": 0.9935450553894043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5610111951828003, "epoch": 0.31, "learning_rate": 4.846999154691463e-05, "loss": 0.3601, "step": 362, "task_loss": 0.6307058334350586 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26061660051345825, "epoch": 0.31, "learning_rate": 4.846576500422654e-05, "loss": 0.4462, "step": 363, "task_loss": 0.7939696907997131 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2086484432220459, "epoch": 0.31, "learning_rate": 4.846153846153846e-05, "loss": 0.4501, "step": 364, "task_loss": 1.2991604804992676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2818431258201599, "epoch": 0.31, "learning_rate": 4.845731191885038e-05, "loss": 0.2959, "step": 365, "task_loss": 0.15905030071735382 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2016371339559555, "epoch": 0.31, "learning_rate": 4.84530853761623e-05, "loss": 0.3659, "step": 366, "task_loss": 0.34598076343536377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32798513770103455, "epoch": 0.31, "learning_rate": 4.844885883347422e-05, "loss": 0.3912, "step": 367, "task_loss": 0.8429501056671143 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41197237372398376, "epoch": 0.31, "learning_rate": 4.844463229078614e-05, "loss": 0.4, "step": 368, "task_loss": 0.6444922685623169 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26251524686813354, "epoch": 0.31, "learning_rate": 4.8440405748098054e-05, "loss": 0.3193, "step": 369, "task_loss": 0.41190579533576965 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6477534770965576, "epoch": 0.31, "learning_rate": 4.8436179205409974e-05, "loss": 0.5108, "step": 370, "task_loss": 0.6784238815307617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.268083393573761, "epoch": 0.31, "learning_rate": 4.843195266272189e-05, "loss": 0.3435, "step": 371, "task_loss": 0.0465749129652977 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17515163123607635, "epoch": 0.31, "learning_rate": 4.842772612003382e-05, "loss": 0.318, "step": 372, "task_loss": 0.5904419422149658 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21761217713356018, "epoch": 0.32, "learning_rate": 4.842349957734573e-05, "loss": 0.2948, "step": 373, "task_loss": 0.47346076369285583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14544571936130524, "epoch": 0.32, "learning_rate": 4.841927303465765e-05, "loss": 0.3477, "step": 374, "task_loss": 0.17858652770519257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2630438506603241, "epoch": 0.32, "learning_rate": 4.841504649196957e-05, "loss": 0.3459, "step": 375, "task_loss": 0.3764711916446686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4630126953125, "epoch": 0.32, "learning_rate": 4.8410819949281485e-05, "loss": 0.3812, "step": 376, "task_loss": 0.3913819491863251 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21063268184661865, "epoch": 0.32, "learning_rate": 4.840659340659341e-05, "loss": 0.3403, "step": 377, "task_loss": 0.29857826232910156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4019916355609894, "epoch": 0.32, "learning_rate": 4.840236686390533e-05, "loss": 0.3179, "step": 378, "task_loss": 0.383775532245636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4334412217140198, "epoch": 0.32, "learning_rate": 4.8398140321217245e-05, "loss": 0.2813, "step": 379, "task_loss": 0.09898050129413605 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27811628580093384, "epoch": 0.32, "learning_rate": 4.8393913778529164e-05, "loss": 0.3137, "step": 380, "task_loss": 0.40869438648223877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5298483371734619, "epoch": 0.32, "learning_rate": 4.8389687235841084e-05, "loss": 0.395, "step": 381, "task_loss": 0.770438015460968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2107640653848648, "epoch": 0.32, "learning_rate": 4.8385460693153004e-05, "loss": 0.336, "step": 382, "task_loss": 0.475759357213974 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4724220335483551, "epoch": 0.32, "learning_rate": 4.8381234150464923e-05, "loss": 0.3241, "step": 383, "task_loss": 1.1706597805023193 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33152490854263306, "epoch": 0.32, "learning_rate": 4.837700760777684e-05, "loss": 0.3438, "step": 384, "task_loss": 0.24386684596538544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4147869646549225, "epoch": 0.33, "learning_rate": 4.8372781065088756e-05, "loss": 0.418, "step": 385, "task_loss": 0.7812007665634155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20956742763519287, "epoch": 0.33, "learning_rate": 4.8368554522400676e-05, "loss": 0.3443, "step": 386, "task_loss": 0.2561536133289337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4003746509552002, "epoch": 0.33, "learning_rate": 4.8364327979712596e-05, "loss": 0.4963, "step": 387, "task_loss": 0.5156252980232239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.482105016708374, "epoch": 0.33, "learning_rate": 4.8360101437024515e-05, "loss": 0.4836, "step": 388, "task_loss": 0.6523312330245972 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47945621609687805, "epoch": 0.33, "learning_rate": 4.8355874894336435e-05, "loss": 0.3995, "step": 389, "task_loss": 0.8960033655166626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3446442186832428, "epoch": 0.33, "learning_rate": 4.8351648351648355e-05, "loss": 0.4004, "step": 390, "task_loss": 0.7979370951652527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45113879442214966, "epoch": 0.33, "learning_rate": 4.8347421808960275e-05, "loss": 0.2624, "step": 391, "task_loss": 0.980656623840332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39476102590560913, "epoch": 0.33, "learning_rate": 4.834319526627219e-05, "loss": 0.5158, "step": 392, "task_loss": 0.29816144704818726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25753751397132874, "epoch": 0.33, "learning_rate": 4.833896872358411e-05, "loss": 0.3639, "step": 393, "task_loss": 1.2662850618362427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.302455335855484, "epoch": 0.33, "learning_rate": 4.8334742180896034e-05, "loss": 0.3695, "step": 394, "task_loss": 0.6532285809516907 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19427794218063354, "epoch": 0.33, "learning_rate": 4.833051563820795e-05, "loss": 0.2602, "step": 395, "task_loss": 0.607721209526062 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28633415699005127, "epoch": 0.33, "learning_rate": 4.8326289095519867e-05, "loss": 0.3319, "step": 396, "task_loss": 0.4353885352611542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25589054822921753, "epoch": 0.34, "learning_rate": 4.8322062552831786e-05, "loss": 0.2729, "step": 397, "task_loss": 0.45632249116897583 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5806112885475159, "epoch": 0.34, "learning_rate": 4.83178360101437e-05, "loss": 0.4, "step": 398, "task_loss": 0.6158548593521118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21340397000312805, "epoch": 0.34, "learning_rate": 4.8313609467455626e-05, "loss": 0.3041, "step": 399, "task_loss": 0.5302818417549133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4214976131916046, "epoch": 0.34, "learning_rate": 4.8309382924767545e-05, "loss": 0.3126, "step": 400, "task_loss": 1.0148353576660156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5677328705787659, "epoch": 0.34, "learning_rate": 4.8305156382079465e-05, "loss": 0.3985, "step": 401, "task_loss": 1.1190402507781982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43479910492897034, "epoch": 0.34, "learning_rate": 4.830092983939138e-05, "loss": 0.2808, "step": 402, "task_loss": 0.7361778020858765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3868566155433655, "epoch": 0.34, "learning_rate": 4.82967032967033e-05, "loss": 0.3044, "step": 403, "task_loss": 0.21431250870227814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3633410930633545, "epoch": 0.34, "learning_rate": 4.829247675401522e-05, "loss": 0.4023, "step": 404, "task_loss": 1.0183416604995728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31340885162353516, "epoch": 0.34, "learning_rate": 4.828825021132714e-05, "loss": 0.3294, "step": 405, "task_loss": 0.3995034694671631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22247859835624695, "epoch": 0.34, "learning_rate": 4.828402366863906e-05, "loss": 0.3844, "step": 406, "task_loss": 0.4827563464641571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3401729464530945, "epoch": 0.34, "learning_rate": 4.827979712595098e-05, "loss": 0.409, "step": 407, "task_loss": 0.37865233421325684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23676316440105438, "epoch": 0.34, "learning_rate": 4.827557058326289e-05, "loss": 0.2726, "step": 408, "task_loss": 0.5899599194526672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47724971175193787, "epoch": 0.35, "learning_rate": 4.827134404057481e-05, "loss": 0.4291, "step": 409, "task_loss": 0.34716418385505676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20625603199005127, "epoch": 0.35, "learning_rate": 4.826711749788673e-05, "loss": 0.2917, "step": 410, "task_loss": 0.10252900421619415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5770359039306641, "epoch": 0.35, "learning_rate": 4.826289095519865e-05, "loss": 0.3665, "step": 411, "task_loss": 0.7841318845748901 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12576042115688324, "epoch": 0.35, "learning_rate": 4.825866441251057e-05, "loss": 0.3062, "step": 412, "task_loss": 0.005407290533185005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16354574263095856, "epoch": 0.35, "learning_rate": 4.825443786982249e-05, "loss": 0.2629, "step": 413, "task_loss": 0.6066843867301941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18714019656181335, "epoch": 0.35, "learning_rate": 4.82502113271344e-05, "loss": 0.4209, "step": 414, "task_loss": 1.005037784576416 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7463491559028625, "epoch": 0.35, "learning_rate": 4.824598478444632e-05, "loss": 0.4831, "step": 415, "task_loss": 0.5606073141098022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35003143548965454, "epoch": 0.35, "learning_rate": 4.824175824175825e-05, "loss": 0.3597, "step": 416, "task_loss": 0.030992213636636734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35349225997924805, "epoch": 0.35, "learning_rate": 4.823753169907017e-05, "loss": 0.4748, "step": 417, "task_loss": 0.12775585055351257 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3403266966342926, "epoch": 0.35, "learning_rate": 4.823330515638208e-05, "loss": 0.3222, "step": 418, "task_loss": 0.5558363199234009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3834650218486786, "epoch": 0.35, "learning_rate": 4.8229078613694e-05, "loss": 0.3528, "step": 419, "task_loss": 0.7809251546859741 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24055622518062592, "epoch": 0.35, "learning_rate": 4.822485207100592e-05, "loss": 0.416, "step": 420, "task_loss": 0.8119294047355652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34761524200439453, "epoch": 0.36, "learning_rate": 4.822062552831784e-05, "loss": 0.3734, "step": 421, "task_loss": 0.5272291302680969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17843936383724213, "epoch": 0.36, "learning_rate": 4.821639898562976e-05, "loss": 0.2783, "step": 422, "task_loss": 0.09369053691625595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14937134087085724, "epoch": 0.36, "learning_rate": 4.821217244294168e-05, "loss": 0.2942, "step": 423, "task_loss": 0.5208826065063477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3691389262676239, "epoch": 0.36, "learning_rate": 4.820794590025359e-05, "loss": 0.339, "step": 424, "task_loss": 0.6015507578849792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12653256952762604, "epoch": 0.36, "learning_rate": 4.820371935756551e-05, "loss": 0.2732, "step": 425, "task_loss": 0.09884674102067947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3428172171115875, "epoch": 0.36, "learning_rate": 4.819949281487743e-05, "loss": 0.4267, "step": 426, "task_loss": 1.3080663681030273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3141721785068512, "epoch": 0.36, "learning_rate": 4.819526627218935e-05, "loss": 0.3324, "step": 427, "task_loss": 2.044431447982788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24937307834625244, "epoch": 0.36, "learning_rate": 4.819103972950127e-05, "loss": 0.4084, "step": 428, "task_loss": 1.371000051498413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24519547820091248, "epoch": 0.36, "learning_rate": 4.818681318681319e-05, "loss": 0.4358, "step": 429, "task_loss": 0.9066499471664429 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20103178918361664, "epoch": 0.36, "learning_rate": 4.818258664412511e-05, "loss": 0.3936, "step": 430, "task_loss": 1.0921999216079712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37281349301338196, "epoch": 0.36, "learning_rate": 4.8178360101437023e-05, "loss": 0.4536, "step": 431, "task_loss": 0.82737135887146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2910953760147095, "epoch": 0.36, "learning_rate": 4.817413355874894e-05, "loss": 0.3462, "step": 432, "task_loss": 0.5337796807289124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2561354637145996, "epoch": 0.37, "learning_rate": 4.816990701606087e-05, "loss": 0.2544, "step": 433, "task_loss": 0.3379628658294678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2789451777935028, "epoch": 0.37, "learning_rate": 4.816568047337278e-05, "loss": 0.3645, "step": 434, "task_loss": 0.8729575872421265 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.228978231549263, "epoch": 0.37, "learning_rate": 4.81614539306847e-05, "loss": 0.2767, "step": 435, "task_loss": 0.10930343717336655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3351702094078064, "epoch": 0.37, "learning_rate": 4.815722738799662e-05, "loss": 0.3671, "step": 436, "task_loss": 0.45560285449028015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23894400894641876, "epoch": 0.37, "learning_rate": 4.8153000845308535e-05, "loss": 0.3189, "step": 437, "task_loss": 1.1994609832763672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4025985598564148, "epoch": 0.37, "learning_rate": 4.814877430262046e-05, "loss": 0.4706, "step": 438, "task_loss": 1.3986836671829224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28973841667175293, "epoch": 0.37, "learning_rate": 4.814454775993238e-05, "loss": 0.3394, "step": 439, "task_loss": 0.7910912036895752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18264514207839966, "epoch": 0.37, "learning_rate": 4.8140321217244294e-05, "loss": 0.3787, "step": 440, "task_loss": 0.49460968375205994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2948886752128601, "epoch": 0.37, "learning_rate": 4.8136094674556214e-05, "loss": 0.3763, "step": 441, "task_loss": 0.28919515013694763 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25420182943344116, "epoch": 0.37, "learning_rate": 4.8131868131868134e-05, "loss": 0.2895, "step": 442, "task_loss": 0.8527222871780396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40356898307800293, "epoch": 0.37, "learning_rate": 4.8127641589180054e-05, "loss": 0.4178, "step": 443, "task_loss": 0.5866603851318359 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.392120361328125, "epoch": 0.38, "learning_rate": 4.812341504649197e-05, "loss": 0.4148, "step": 444, "task_loss": 0.5381209254264832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29924556612968445, "epoch": 0.38, "learning_rate": 4.811918850380389e-05, "loss": 0.3712, "step": 445, "task_loss": 0.7404133081436157 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1895020306110382, "epoch": 0.38, "learning_rate": 4.811496196111581e-05, "loss": 0.2502, "step": 446, "task_loss": 0.27927011251449585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2506890296936035, "epoch": 0.38, "learning_rate": 4.8110735418427726e-05, "loss": 0.3402, "step": 447, "task_loss": 1.0304343700408936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28709763288497925, "epoch": 0.38, "learning_rate": 4.8106508875739645e-05, "loss": 0.5573, "step": 448, "task_loss": 0.9209693670272827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39132872223854065, "epoch": 0.38, "learning_rate": 4.8102282333051565e-05, "loss": 0.3815, "step": 449, "task_loss": 1.4431852102279663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22359013557434082, "epoch": 0.38, "learning_rate": 4.8098055790363485e-05, "loss": 0.389, "step": 450, "task_loss": 0.2309865951538086 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3252839148044586, "epoch": 0.38, "learning_rate": 4.8093829247675405e-05, "loss": 0.2982, "step": 451, "task_loss": 0.3696426451206207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23491056263446808, "epoch": 0.38, "learning_rate": 4.8089602704987324e-05, "loss": 0.2321, "step": 452, "task_loss": 0.05885794386267662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27181416749954224, "epoch": 0.38, "learning_rate": 4.808537616229924e-05, "loss": 0.4129, "step": 453, "task_loss": 0.33676445484161377 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2541554868221283, "epoch": 0.38, "learning_rate": 4.808114961961116e-05, "loss": 0.3788, "step": 454, "task_loss": 0.4041786193847656 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1806519329547882, "epoch": 0.38, "learning_rate": 4.8076923076923084e-05, "loss": 0.2849, "step": 455, "task_loss": 0.9338952898979187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37558436393737793, "epoch": 0.39, "learning_rate": 4.8072696534235e-05, "loss": 0.3397, "step": 456, "task_loss": 0.12076132744550705 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48241809010505676, "epoch": 0.39, "learning_rate": 4.8068469991546916e-05, "loss": 0.2795, "step": 457, "task_loss": 0.3814953863620758 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17230990529060364, "epoch": 0.39, "learning_rate": 4.8064243448858836e-05, "loss": 0.4157, "step": 458, "task_loss": 1.0113098621368408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24254800379276276, "epoch": 0.39, "learning_rate": 4.8060016906170756e-05, "loss": 0.362, "step": 459, "task_loss": 0.7341673374176025 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29597097635269165, "epoch": 0.39, "learning_rate": 4.8055790363482676e-05, "loss": 0.3422, "step": 460, "task_loss": 0.7422024607658386 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24189169704914093, "epoch": 0.39, "learning_rate": 4.8051563820794595e-05, "loss": 0.3411, "step": 461, "task_loss": 1.0566450357437134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21869753301143646, "epoch": 0.39, "learning_rate": 4.8047337278106515e-05, "loss": 0.3826, "step": 462, "task_loss": 0.19208063185214996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29792067408561707, "epoch": 0.39, "learning_rate": 4.804311073541843e-05, "loss": 0.3736, "step": 463, "task_loss": 0.3594626188278198 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5067954659461975, "epoch": 0.39, "learning_rate": 4.803888419273035e-05, "loss": 0.3097, "step": 464, "task_loss": 0.3228074312210083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29852721095085144, "epoch": 0.39, "learning_rate": 4.803465765004227e-05, "loss": 0.2871, "step": 465, "task_loss": 0.5256339907646179 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32675912976264954, "epoch": 0.39, "learning_rate": 4.803043110735419e-05, "loss": 0.3054, "step": 466, "task_loss": 0.5943156480789185 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29917216300964355, "epoch": 0.39, "learning_rate": 4.802620456466611e-05, "loss": 0.2851, "step": 467, "task_loss": 0.3273777961730957 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4044845700263977, "epoch": 0.4, "learning_rate": 4.802197802197803e-05, "loss": 0.4042, "step": 468, "task_loss": 0.5378451943397522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12516695261001587, "epoch": 0.4, "learning_rate": 4.801775147928994e-05, "loss": 0.3103, "step": 469, "task_loss": 0.06358525902032852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7075236439704895, "epoch": 0.4, "learning_rate": 4.801352493660186e-05, "loss": 0.4171, "step": 470, "task_loss": 1.1705182790756226 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.367145836353302, "epoch": 0.4, "learning_rate": 4.800929839391378e-05, "loss": 0.3726, "step": 471, "task_loss": 0.8300154805183411 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30779290199279785, "epoch": 0.4, "learning_rate": 4.80050718512257e-05, "loss": 0.3512, "step": 472, "task_loss": 0.9597095251083374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.468344509601593, "epoch": 0.4, "learning_rate": 4.800084530853762e-05, "loss": 0.3587, "step": 473, "task_loss": 0.5425146818161011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38902226090431213, "epoch": 0.4, "learning_rate": 4.799661876584954e-05, "loss": 0.268, "step": 474, "task_loss": 0.32854801416397095 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2791878283023834, "epoch": 0.4, "learning_rate": 4.799239222316146e-05, "loss": 0.3196, "step": 475, "task_loss": 0.4752495288848877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24759617447853088, "epoch": 0.4, "learning_rate": 4.798816568047337e-05, "loss": 0.3489, "step": 476, "task_loss": 0.7786201238632202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2376200556755066, "epoch": 0.4, "learning_rate": 4.79839391377853e-05, "loss": 0.3212, "step": 477, "task_loss": 0.2219945788383484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4746355712413788, "epoch": 0.4, "learning_rate": 4.797971259509722e-05, "loss": 0.4187, "step": 478, "task_loss": 1.1486724615097046 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38645029067993164, "epoch": 0.4, "learning_rate": 4.797548605240913e-05, "loss": 0.4046, "step": 479, "task_loss": 1.1827445030212402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3958989679813385, "epoch": 0.41, "learning_rate": 4.797125950972105e-05, "loss": 0.333, "step": 480, "task_loss": 0.9585304856300354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.555884599685669, "epoch": 0.41, "learning_rate": 4.796703296703297e-05, "loss": 0.4266, "step": 481, "task_loss": 1.1617844104766846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4958420395851135, "epoch": 0.41, "learning_rate": 4.796280642434488e-05, "loss": 0.4032, "step": 482, "task_loss": 0.5177137851715088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33605822920799255, "epoch": 0.41, "learning_rate": 4.795857988165681e-05, "loss": 0.2613, "step": 483, "task_loss": 0.7914626002311707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23409125208854675, "epoch": 0.41, "learning_rate": 4.795435333896873e-05, "loss": 0.3058, "step": 484, "task_loss": 0.5280863642692566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5291711688041687, "epoch": 0.41, "learning_rate": 4.795012679628064e-05, "loss": 0.433, "step": 485, "task_loss": 1.4136499166488647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23166726529598236, "epoch": 0.41, "learning_rate": 4.794590025359256e-05, "loss": 0.3855, "step": 486, "task_loss": 0.8607439994812012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2831115126609802, "epoch": 0.41, "learning_rate": 4.794167371090448e-05, "loss": 0.3395, "step": 487, "task_loss": 0.775846540927887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2510896623134613, "epoch": 0.41, "learning_rate": 4.79374471682164e-05, "loss": 0.2651, "step": 488, "task_loss": 0.3774098753929138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3026653528213501, "epoch": 0.41, "learning_rate": 4.793322062552832e-05, "loss": 0.3462, "step": 489, "task_loss": 0.3346295952796936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29465240240097046, "epoch": 0.41, "learning_rate": 4.792899408284024e-05, "loss": 0.3588, "step": 490, "task_loss": 0.2848721742630005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24557900428771973, "epoch": 0.41, "learning_rate": 4.792476754015216e-05, "loss": 0.3079, "step": 491, "task_loss": 0.4794479012489319 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6266154050827026, "epoch": 0.42, "learning_rate": 4.792054099746407e-05, "loss": 0.4814, "step": 492, "task_loss": 0.4172706604003906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.466037392616272, "epoch": 0.42, "learning_rate": 4.791631445477599e-05, "loss": 0.3706, "step": 493, "task_loss": 0.6787412166595459 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28106802701950073, "epoch": 0.42, "learning_rate": 4.791208791208792e-05, "loss": 0.4111, "step": 494, "task_loss": 0.06812220066785812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20570380985736847, "epoch": 0.42, "learning_rate": 4.790786136939983e-05, "loss": 0.3746, "step": 495, "task_loss": 0.35065531730651855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3599362373352051, "epoch": 0.42, "learning_rate": 4.790363482671175e-05, "loss": 0.3066, "step": 496, "task_loss": 0.9745230078697205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4406608045101166, "epoch": 0.42, "learning_rate": 4.789940828402367e-05, "loss": 0.3656, "step": 497, "task_loss": 0.3754790723323822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2744731307029724, "epoch": 0.42, "learning_rate": 4.7895181741335585e-05, "loss": 0.393, "step": 498, "task_loss": 0.4405898451805115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25891149044036865, "epoch": 0.42, "learning_rate": 4.7890955198647505e-05, "loss": 0.2955, "step": 499, "task_loss": 0.5504593849182129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3910437226295471, "epoch": 0.42, "learning_rate": 4.788672865595943e-05, "loss": 0.3011, "step": 500, "task_loss": 0.02430226095020771 }, { "epoch": 0.42, "eval_accuracy": 0.9123960396039604, "eval_loss": 0.19510453939437866, "eval_runtime": 327.4971, "eval_samples_per_second": 77.1, "eval_steps_per_second": 0.605, "step": 500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5021883249282837, "epoch": 0.42, "learning_rate": 4.7882502113271344e-05, "loss": 0.4732, "step": 501, "task_loss": 1.2089918851852417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19148047268390656, "epoch": 0.42, "learning_rate": 4.7878275570583264e-05, "loss": 0.2744, "step": 502, "task_loss": 0.11446057260036469 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27991461753845215, "epoch": 0.42, "learning_rate": 4.7874049027895184e-05, "loss": 0.3474, "step": 503, "task_loss": 0.5374883413314819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42400461435317993, "epoch": 0.43, "learning_rate": 4.7869822485207103e-05, "loss": 0.4479, "step": 504, "task_loss": 0.6829332113265991 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19470159709453583, "epoch": 0.43, "learning_rate": 4.786559594251902e-05, "loss": 0.2586, "step": 505, "task_loss": 0.402244508266449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3099380135536194, "epoch": 0.43, "learning_rate": 4.786136939983094e-05, "loss": 0.2832, "step": 506, "task_loss": 0.5082167983055115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.482271671295166, "epoch": 0.43, "learning_rate": 4.785714285714286e-05, "loss": 0.3306, "step": 507, "task_loss": 0.6163338422775269 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2019622027873993, "epoch": 0.43, "learning_rate": 4.7852916314454776e-05, "loss": 0.2254, "step": 508, "task_loss": 0.2669321894645691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29598677158355713, "epoch": 0.43, "learning_rate": 4.7848689771766695e-05, "loss": 0.3381, "step": 509, "task_loss": 0.652377724647522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3812240958213806, "epoch": 0.43, "learning_rate": 4.7844463229078615e-05, "loss": 0.3229, "step": 510, "task_loss": 0.361339807510376 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2678496241569519, "epoch": 0.43, "learning_rate": 4.7840236686390535e-05, "loss": 0.3488, "step": 511, "task_loss": 0.5789118409156799 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33221471309661865, "epoch": 0.43, "learning_rate": 4.7836010143702455e-05, "loss": 0.488, "step": 512, "task_loss": 0.6737244725227356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35193681716918945, "epoch": 0.43, "learning_rate": 4.7831783601014374e-05, "loss": 0.4167, "step": 513, "task_loss": 0.20153209567070007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28466469049453735, "epoch": 0.43, "learning_rate": 4.782755705832629e-05, "loss": 0.3329, "step": 514, "task_loss": 0.9720994234085083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32120317220687866, "epoch": 0.44, "learning_rate": 4.782333051563821e-05, "loss": 0.3825, "step": 515, "task_loss": 0.7925823926925659 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2772601842880249, "epoch": 0.44, "learning_rate": 4.781910397295013e-05, "loss": 0.3274, "step": 516, "task_loss": 0.3258647918701172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5394781827926636, "epoch": 0.44, "learning_rate": 4.781487743026205e-05, "loss": 0.3648, "step": 517, "task_loss": 1.1306638717651367 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43012502789497375, "epoch": 0.44, "learning_rate": 4.7810650887573966e-05, "loss": 0.3062, "step": 518, "task_loss": 0.4624011516571045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38749969005584717, "epoch": 0.44, "learning_rate": 4.7806424344885886e-05, "loss": 0.3651, "step": 519, "task_loss": 0.966245710849762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6148722171783447, "epoch": 0.44, "learning_rate": 4.7802197802197806e-05, "loss": 0.3658, "step": 520, "task_loss": 0.24293069541454315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4697817862033844, "epoch": 0.44, "learning_rate": 4.779797125950972e-05, "loss": 0.4882, "step": 521, "task_loss": 0.42285576462745667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2668307423591614, "epoch": 0.44, "learning_rate": 4.7793744716821645e-05, "loss": 0.4171, "step": 522, "task_loss": 0.5063718557357788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21127350628376007, "epoch": 0.44, "learning_rate": 4.7789518174133565e-05, "loss": 0.303, "step": 523, "task_loss": 0.40686243772506714 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5263411998748779, "epoch": 0.44, "learning_rate": 4.778529163144548e-05, "loss": 0.4068, "step": 524, "task_loss": 0.7424903512001038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22469529509544373, "epoch": 0.44, "learning_rate": 4.77810650887574e-05, "loss": 0.3641, "step": 525, "task_loss": 0.3689127266407013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2438812255859375, "epoch": 0.44, "learning_rate": 4.777683854606932e-05, "loss": 0.2921, "step": 526, "task_loss": 0.5155799984931946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23396344482898712, "epoch": 0.45, "learning_rate": 4.777261200338124e-05, "loss": 0.3225, "step": 527, "task_loss": 0.9596747159957886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39731040596961975, "epoch": 0.45, "learning_rate": 4.776838546069316e-05, "loss": 0.4376, "step": 528, "task_loss": 0.29169049859046936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22641459107398987, "epoch": 0.45, "learning_rate": 4.7764158918005077e-05, "loss": 0.2559, "step": 529, "task_loss": 0.14912928640842438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.10016551613807678, "epoch": 0.45, "learning_rate": 4.775993237531699e-05, "loss": 0.2922, "step": 530, "task_loss": 0.4573501646518707 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28039270639419556, "epoch": 0.45, "learning_rate": 4.775570583262891e-05, "loss": 0.3719, "step": 531, "task_loss": 0.1638871431350708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3260061740875244, "epoch": 0.45, "learning_rate": 4.775147928994083e-05, "loss": 0.4424, "step": 532, "task_loss": 0.6587778329849243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5293354392051697, "epoch": 0.45, "learning_rate": 4.774725274725275e-05, "loss": 0.4823, "step": 533, "task_loss": 0.3607713282108307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2598116993904114, "epoch": 0.45, "learning_rate": 4.774302620456467e-05, "loss": 0.3749, "step": 534, "task_loss": 1.022348165512085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2715606391429901, "epoch": 0.45, "learning_rate": 4.773879966187659e-05, "loss": 0.285, "step": 535, "task_loss": 0.06552083790302277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3325073719024658, "epoch": 0.45, "learning_rate": 4.773457311918851e-05, "loss": 0.3995, "step": 536, "task_loss": 0.3835010826587677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36279869079589844, "epoch": 0.45, "learning_rate": 4.773034657650042e-05, "loss": 0.4089, "step": 537, "task_loss": 0.8075441122055054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17732396721839905, "epoch": 0.45, "learning_rate": 4.772612003381234e-05, "loss": 0.287, "step": 538, "task_loss": 0.7138428092002869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23727980256080627, "epoch": 0.46, "learning_rate": 4.772189349112427e-05, "loss": 0.2274, "step": 539, "task_loss": 0.46086442470550537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3349863290786743, "epoch": 0.46, "learning_rate": 4.771766694843618e-05, "loss": 0.3772, "step": 540, "task_loss": 0.38162460923194885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2596428692340851, "epoch": 0.46, "learning_rate": 4.77134404057481e-05, "loss": 0.4017, "step": 541, "task_loss": 0.4579458236694336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.267270565032959, "epoch": 0.46, "learning_rate": 4.770921386306002e-05, "loss": 0.284, "step": 542, "task_loss": 0.7062729597091675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4117518961429596, "epoch": 0.46, "learning_rate": 4.770498732037193e-05, "loss": 0.298, "step": 543, "task_loss": 0.9053981900215149 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4807129204273224, "epoch": 0.46, "learning_rate": 4.770076077768386e-05, "loss": 0.3457, "step": 544, "task_loss": 0.4797295033931732 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35948461294174194, "epoch": 0.46, "learning_rate": 4.769653423499578e-05, "loss": 0.435, "step": 545, "task_loss": 0.8107247352600098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14028194546699524, "epoch": 0.46, "learning_rate": 4.76923076923077e-05, "loss": 0.2966, "step": 546, "task_loss": 0.24427273869514465 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2708081007003784, "epoch": 0.46, "learning_rate": 4.768808114961961e-05, "loss": 0.3261, "step": 547, "task_loss": 0.3082984387874603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14703203737735748, "epoch": 0.46, "learning_rate": 4.768385460693153e-05, "loss": 0.2915, "step": 548, "task_loss": 0.6045756340026855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2717135548591614, "epoch": 0.46, "learning_rate": 4.767962806424345e-05, "loss": 0.3487, "step": 549, "task_loss": 1.257994532585144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2661055028438568, "epoch": 0.46, "learning_rate": 4.767540152155537e-05, "loss": 0.3511, "step": 550, "task_loss": 1.0356454849243164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20614999532699585, "epoch": 0.47, "learning_rate": 4.767117497886729e-05, "loss": 0.2855, "step": 551, "task_loss": 0.8550474643707275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2185359001159668, "epoch": 0.47, "learning_rate": 4.766694843617921e-05, "loss": 0.3445, "step": 552, "task_loss": 0.8826864957809448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19440582394599915, "epoch": 0.47, "learning_rate": 4.766272189349112e-05, "loss": 0.3682, "step": 553, "task_loss": 1.3656309843063354 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21757423877716064, "epoch": 0.47, "learning_rate": 4.765849535080304e-05, "loss": 0.3358, "step": 554, "task_loss": 0.1205420270562172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3195817470550537, "epoch": 0.47, "learning_rate": 4.765426880811496e-05, "loss": 0.4061, "step": 555, "task_loss": 0.4966970384120941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24277961254119873, "epoch": 0.47, "learning_rate": 4.765004226542688e-05, "loss": 0.2809, "step": 556, "task_loss": 0.28136998414993286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35525980591773987, "epoch": 0.47, "learning_rate": 4.76458157227388e-05, "loss": 0.4081, "step": 557, "task_loss": 1.4973515272140503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2915230989456177, "epoch": 0.47, "learning_rate": 4.764158918005072e-05, "loss": 0.2908, "step": 558, "task_loss": 0.25230297446250916 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4903471767902374, "epoch": 0.47, "learning_rate": 4.7637362637362635e-05, "loss": 0.3884, "step": 559, "task_loss": 0.5842353105545044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21438221633434296, "epoch": 0.47, "learning_rate": 4.7633136094674555e-05, "loss": 0.347, "step": 560, "task_loss": 0.7824996709823608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3050263524055481, "epoch": 0.47, "learning_rate": 4.762890955198648e-05, "loss": 0.334, "step": 561, "task_loss": 0.6036117076873779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42564263939857483, "epoch": 0.47, "learning_rate": 4.76246830092984e-05, "loss": 0.47, "step": 562, "task_loss": 0.9398266673088074 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2418982982635498, "epoch": 0.48, "learning_rate": 4.7620456466610314e-05, "loss": 0.3335, "step": 563, "task_loss": 0.7790709137916565 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26156485080718994, "epoch": 0.48, "learning_rate": 4.7616229923922234e-05, "loss": 0.3099, "step": 564, "task_loss": 1.1668341159820557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3860306739807129, "epoch": 0.48, "learning_rate": 4.761200338123415e-05, "loss": 0.3577, "step": 565, "task_loss": 0.4409308135509491 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1832307130098343, "epoch": 0.48, "learning_rate": 4.760777683854607e-05, "loss": 0.2575, "step": 566, "task_loss": 0.09991677850484848 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35417214035987854, "epoch": 0.48, "learning_rate": 4.760355029585799e-05, "loss": 0.4463, "step": 567, "task_loss": 1.2209007740020752 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22337156534194946, "epoch": 0.48, "learning_rate": 4.759932375316991e-05, "loss": 0.4262, "step": 568, "task_loss": 0.05335812270641327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40127262473106384, "epoch": 0.48, "learning_rate": 4.7595097210481825e-05, "loss": 0.4051, "step": 569, "task_loss": 0.9153675436973572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4061581790447235, "epoch": 0.48, "learning_rate": 4.7590870667793745e-05, "loss": 0.3092, "step": 570, "task_loss": 0.33734214305877686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3070034384727478, "epoch": 0.48, "learning_rate": 4.7586644125105665e-05, "loss": 0.4155, "step": 571, "task_loss": 0.6663563251495361 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4043702185153961, "epoch": 0.48, "learning_rate": 4.7582417582417585e-05, "loss": 0.2791, "step": 572, "task_loss": 0.3064482808113098 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41948115825653076, "epoch": 0.48, "learning_rate": 4.7578191039729504e-05, "loss": 0.3252, "step": 573, "task_loss": 0.902721643447876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19369187951087952, "epoch": 0.48, "learning_rate": 4.7573964497041424e-05, "loss": 0.3187, "step": 574, "task_loss": 0.7382258176803589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2930937111377716, "epoch": 0.49, "learning_rate": 4.7569737954353344e-05, "loss": 0.4316, "step": 575, "task_loss": 0.8327093124389648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4381175637245178, "epoch": 0.49, "learning_rate": 4.756551141166526e-05, "loss": 0.4305, "step": 576, "task_loss": 0.4262275993824005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22403079271316528, "epoch": 0.49, "learning_rate": 4.7561284868977177e-05, "loss": 0.3873, "step": 577, "task_loss": 1.1204639673233032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3349193334579468, "epoch": 0.49, "learning_rate": 4.75570583262891e-05, "loss": 0.4167, "step": 578, "task_loss": 0.880795419216156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23430661857128143, "epoch": 0.49, "learning_rate": 4.7552831783601016e-05, "loss": 0.4672, "step": 579, "task_loss": 1.0049666166305542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2768678665161133, "epoch": 0.49, "learning_rate": 4.7548605240912936e-05, "loss": 0.3531, "step": 580, "task_loss": 1.0239022970199585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15766771137714386, "epoch": 0.49, "learning_rate": 4.7544378698224856e-05, "loss": 0.281, "step": 581, "task_loss": 0.7404004335403442 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3777739405632019, "epoch": 0.49, "learning_rate": 4.754015215553677e-05, "loss": 0.4956, "step": 582, "task_loss": 1.2778314352035522 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24781480431556702, "epoch": 0.49, "learning_rate": 4.7535925612848695e-05, "loss": 0.3808, "step": 583, "task_loss": 0.21571092307567596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1052047461271286, "epoch": 0.49, "learning_rate": 4.7531699070160615e-05, "loss": 0.325, "step": 584, "task_loss": 0.8777510523796082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.44990894198417664, "epoch": 0.49, "learning_rate": 4.752747252747253e-05, "loss": 0.3748, "step": 585, "task_loss": 1.3203763961791992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25098681449890137, "epoch": 0.5, "learning_rate": 4.752324598478445e-05, "loss": 0.3174, "step": 586, "task_loss": 0.18381808698177338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36845919489860535, "epoch": 0.5, "learning_rate": 4.751901944209637e-05, "loss": 0.3516, "step": 587, "task_loss": 0.7729542255401611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48265665769577026, "epoch": 0.5, "learning_rate": 4.751479289940829e-05, "loss": 0.397, "step": 588, "task_loss": 0.7330112457275391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22566981613636017, "epoch": 0.5, "learning_rate": 4.751056635672021e-05, "loss": 0.3897, "step": 589, "task_loss": 0.40791866183280945 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38977566361427307, "epoch": 0.5, "learning_rate": 4.7506339814032126e-05, "loss": 0.3694, "step": 590, "task_loss": 1.1916109323501587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22224251925945282, "epoch": 0.5, "learning_rate": 4.7502113271344046e-05, "loss": 0.3209, "step": 591, "task_loss": 0.23281393945217133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22563908994197845, "epoch": 0.5, "learning_rate": 4.749788672865596e-05, "loss": 0.324, "step": 592, "task_loss": 0.2593800723552704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24176111817359924, "epoch": 0.5, "learning_rate": 4.749366018596788e-05, "loss": 0.2747, "step": 593, "task_loss": 0.4232793152332306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14211207628250122, "epoch": 0.5, "learning_rate": 4.74894336432798e-05, "loss": 0.2476, "step": 594, "task_loss": 0.28077563643455505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3431089222431183, "epoch": 0.5, "learning_rate": 4.748520710059172e-05, "loss": 0.3743, "step": 595, "task_loss": 0.33892443776130676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2833075523376465, "epoch": 0.5, "learning_rate": 4.748098055790364e-05, "loss": 0.3038, "step": 596, "task_loss": 0.9755491018295288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18923494219779968, "epoch": 0.5, "learning_rate": 4.747675401521556e-05, "loss": 0.3584, "step": 597, "task_loss": 0.2845441997051239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46768224239349365, "epoch": 0.51, "learning_rate": 4.747252747252747e-05, "loss": 0.5477, "step": 598, "task_loss": 0.8213875889778137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25206178426742554, "epoch": 0.51, "learning_rate": 4.746830092983939e-05, "loss": 0.3056, "step": 599, "task_loss": 0.5093951225280762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3375788927078247, "epoch": 0.51, "learning_rate": 4.746407438715132e-05, "loss": 0.4649, "step": 600, "task_loss": 1.0816824436187744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20050367712974548, "epoch": 0.51, "learning_rate": 4.745984784446323e-05, "loss": 0.4619, "step": 601, "task_loss": 1.3885235786437988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2921428680419922, "epoch": 0.51, "learning_rate": 4.745562130177515e-05, "loss": 0.4117, "step": 602, "task_loss": 0.6163491606712341 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3945145010948181, "epoch": 0.51, "learning_rate": 4.745139475908707e-05, "loss": 0.2476, "step": 603, "task_loss": 0.5006596446037292 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45314669609069824, "epoch": 0.51, "learning_rate": 4.744716821639899e-05, "loss": 0.3132, "step": 604, "task_loss": 1.1119205951690674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2548198103904724, "epoch": 0.51, "learning_rate": 4.744294167371091e-05, "loss": 0.2579, "step": 605, "task_loss": 0.31535807251930237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24760308861732483, "epoch": 0.51, "learning_rate": 4.743871513102283e-05, "loss": 0.3738, "step": 606, "task_loss": 0.14562563598155975 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6453937888145447, "epoch": 0.51, "learning_rate": 4.743448858833475e-05, "loss": 0.4029, "step": 607, "task_loss": 0.4509686827659607 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45204368233680725, "epoch": 0.51, "learning_rate": 4.743026204564666e-05, "loss": 0.3934, "step": 608, "task_loss": 0.6939801573753357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2942051291465759, "epoch": 0.51, "learning_rate": 4.742603550295858e-05, "loss": 0.3947, "step": 609, "task_loss": 0.5363163352012634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31975531578063965, "epoch": 0.52, "learning_rate": 4.74218089602705e-05, "loss": 0.3381, "step": 610, "task_loss": 0.5022314786911011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3351353406906128, "epoch": 0.52, "learning_rate": 4.741758241758242e-05, "loss": 0.3142, "step": 611, "task_loss": 0.8475996851921082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26977065205574036, "epoch": 0.52, "learning_rate": 4.741335587489434e-05, "loss": 0.3987, "step": 612, "task_loss": 0.6088987588882446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2809360921382904, "epoch": 0.52, "learning_rate": 4.740912933220626e-05, "loss": 0.4348, "step": 613, "task_loss": 0.7034056782722473 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2232653945684433, "epoch": 0.52, "learning_rate": 4.740490278951817e-05, "loss": 0.3524, "step": 614, "task_loss": 1.1150834560394287 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.340452641248703, "epoch": 0.52, "learning_rate": 4.740067624683009e-05, "loss": 0.5105, "step": 615, "task_loss": 1.2780228853225708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31792181730270386, "epoch": 0.52, "learning_rate": 4.739644970414201e-05, "loss": 0.3358, "step": 616, "task_loss": 1.0097323656082153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29654550552368164, "epoch": 0.52, "learning_rate": 4.739222316145393e-05, "loss": 0.3331, "step": 617, "task_loss": 1.2019599676132202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24710915982723236, "epoch": 0.52, "learning_rate": 4.738799661876585e-05, "loss": 0.3947, "step": 618, "task_loss": 0.6719022989273071 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34631967544555664, "epoch": 0.52, "learning_rate": 4.738377007607777e-05, "loss": 0.3772, "step": 619, "task_loss": 0.2534567415714264 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3382919132709503, "epoch": 0.52, "learning_rate": 4.737954353338969e-05, "loss": 0.4319, "step": 620, "task_loss": 0.7812601923942566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4298924207687378, "epoch": 0.52, "learning_rate": 4.7375316990701604e-05, "loss": 0.3914, "step": 621, "task_loss": 1.0895129442214966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.7368341088294983, "epoch": 0.53, "learning_rate": 4.737109044801353e-05, "loss": 0.4644, "step": 622, "task_loss": 1.4669815301895142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1839587688446045, "epoch": 0.53, "learning_rate": 4.736686390532545e-05, "loss": 0.2894, "step": 623, "task_loss": 0.10642831772565842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.549903929233551, "epoch": 0.53, "learning_rate": 4.7362637362637364e-05, "loss": 0.4813, "step": 624, "task_loss": 0.5704725384712219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.390815407037735, "epoch": 0.53, "learning_rate": 4.735841081994928e-05, "loss": 0.4957, "step": 625, "task_loss": 0.3729742765426636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17100507020950317, "epoch": 0.53, "learning_rate": 4.73541842772612e-05, "loss": 0.3505, "step": 626, "task_loss": 0.6985848546028137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40018224716186523, "epoch": 0.53, "learning_rate": 4.7349957734573116e-05, "loss": 0.3914, "step": 627, "task_loss": 0.48632675409317017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5129507184028625, "epoch": 0.53, "learning_rate": 4.734573119188504e-05, "loss": 0.4269, "step": 628, "task_loss": 0.9075433611869812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27295151352882385, "epoch": 0.53, "learning_rate": 4.734150464919696e-05, "loss": 0.2895, "step": 629, "task_loss": 0.8488480448722839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4017839729785919, "epoch": 0.53, "learning_rate": 4.7337278106508875e-05, "loss": 0.3767, "step": 630, "task_loss": 1.2542822360992432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36919310688972473, "epoch": 0.53, "learning_rate": 4.7333051563820795e-05, "loss": 0.3212, "step": 631, "task_loss": 0.3986697494983673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34609749913215637, "epoch": 0.53, "learning_rate": 4.7328825021132715e-05, "loss": 0.4112, "step": 632, "task_loss": 0.9559993743896484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4174095392227173, "epoch": 0.53, "learning_rate": 4.7324598478444634e-05, "loss": 0.3243, "step": 633, "task_loss": 0.25843098759651184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24086512625217438, "epoch": 0.54, "learning_rate": 4.7320371935756554e-05, "loss": 0.2703, "step": 634, "task_loss": 0.8146135210990906 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2659895718097687, "epoch": 0.54, "learning_rate": 4.7316145393068474e-05, "loss": 0.3339, "step": 635, "task_loss": 0.4702014625072479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33221834897994995, "epoch": 0.54, "learning_rate": 4.7311918850380394e-05, "loss": 0.3936, "step": 636, "task_loss": 1.457092046737671 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2918318510055542, "epoch": 0.54, "learning_rate": 4.730769230769231e-05, "loss": 0.3259, "step": 637, "task_loss": 0.7226035594940186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33243972063064575, "epoch": 0.54, "learning_rate": 4.7303465765004226e-05, "loss": 0.3405, "step": 638, "task_loss": 1.0920684337615967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.391266793012619, "epoch": 0.54, "learning_rate": 4.729923922231615e-05, "loss": 0.4497, "step": 639, "task_loss": 0.24512942135334015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6080100536346436, "epoch": 0.54, "learning_rate": 4.7295012679628066e-05, "loss": 0.3821, "step": 640, "task_loss": 0.40043750405311584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19417530298233032, "epoch": 0.54, "learning_rate": 4.7290786136939986e-05, "loss": 0.3383, "step": 641, "task_loss": 0.259618878364563 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23771923780441284, "epoch": 0.54, "learning_rate": 4.7286559594251905e-05, "loss": 0.3528, "step": 642, "task_loss": 0.9821199774742126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26665687561035156, "epoch": 0.54, "learning_rate": 4.728233305156382e-05, "loss": 0.4123, "step": 643, "task_loss": 0.9828779697418213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2349528670310974, "epoch": 0.54, "learning_rate": 4.727810650887574e-05, "loss": 0.3125, "step": 644, "task_loss": 0.5495439171791077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37252551317214966, "epoch": 0.54, "learning_rate": 4.7273879966187665e-05, "loss": 0.423, "step": 645, "task_loss": 0.6340850591659546 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3871510922908783, "epoch": 0.55, "learning_rate": 4.726965342349958e-05, "loss": 0.4527, "step": 646, "task_loss": 0.31135350465774536 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40592512488365173, "epoch": 0.55, "learning_rate": 4.72654268808115e-05, "loss": 0.365, "step": 647, "task_loss": 0.35286909341812134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19760552048683167, "epoch": 0.55, "learning_rate": 4.726120033812342e-05, "loss": 0.4602, "step": 648, "task_loss": 0.2875680923461914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2774466872215271, "epoch": 0.55, "learning_rate": 4.725697379543534e-05, "loss": 0.3009, "step": 649, "task_loss": 0.40192073583602905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.386837363243103, "epoch": 0.55, "learning_rate": 4.7252747252747257e-05, "loss": 0.3292, "step": 650, "task_loss": 0.8110374212265015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2615984380245209, "epoch": 0.55, "learning_rate": 4.7248520710059176e-05, "loss": 0.342, "step": 651, "task_loss": 0.7546160221099854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26061612367630005, "epoch": 0.55, "learning_rate": 4.7244294167371096e-05, "loss": 0.292, "step": 652, "task_loss": 0.5437497496604919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27350395917892456, "epoch": 0.55, "learning_rate": 4.724006762468301e-05, "loss": 0.3561, "step": 653, "task_loss": 0.23291213810443878 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.317546546459198, "epoch": 0.55, "learning_rate": 4.723584108199493e-05, "loss": 0.3753, "step": 654, "task_loss": 0.5752608776092529 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6529425382614136, "epoch": 0.55, "learning_rate": 4.723161453930685e-05, "loss": 0.3672, "step": 655, "task_loss": 0.07581650465726852 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29290971159935, "epoch": 0.55, "learning_rate": 4.722738799661877e-05, "loss": 0.449, "step": 656, "task_loss": 0.593906819820404 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22481581568717957, "epoch": 0.56, "learning_rate": 4.722316145393069e-05, "loss": 0.3504, "step": 657, "task_loss": 0.557009220123291 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28708481788635254, "epoch": 0.56, "learning_rate": 4.721893491124261e-05, "loss": 0.3644, "step": 658, "task_loss": 0.6461619734764099 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3660014271736145, "epoch": 0.56, "learning_rate": 4.721470836855452e-05, "loss": 0.5447, "step": 659, "task_loss": 0.8711457252502441 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1999700367450714, "epoch": 0.56, "learning_rate": 4.721048182586644e-05, "loss": 0.4082, "step": 660, "task_loss": 0.39707931876182556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6267917156219482, "epoch": 0.56, "learning_rate": 4.720625528317836e-05, "loss": 0.5217, "step": 661, "task_loss": 0.8283354043960571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40910154581069946, "epoch": 0.56, "learning_rate": 4.7202028740490287e-05, "loss": 0.4229, "step": 662, "task_loss": 0.5927056670188904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2516242563724518, "epoch": 0.56, "learning_rate": 4.71978021978022e-05, "loss": 0.3609, "step": 663, "task_loss": 0.14902153611183167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.333596408367157, "epoch": 0.56, "learning_rate": 4.719357565511412e-05, "loss": 0.5049, "step": 664, "task_loss": 0.3535808324813843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3168475031852722, "epoch": 0.56, "learning_rate": 4.718934911242604e-05, "loss": 0.3079, "step": 665, "task_loss": 0.6552111506462097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33432474732398987, "epoch": 0.56, "learning_rate": 4.718512256973795e-05, "loss": 0.2977, "step": 666, "task_loss": 0.39099422097206116 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39753422141075134, "epoch": 0.56, "learning_rate": 4.718089602704988e-05, "loss": 0.3301, "step": 667, "task_loss": 0.47903263568878174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3049262762069702, "epoch": 0.56, "learning_rate": 4.71766694843618e-05, "loss": 0.4022, "step": 668, "task_loss": 0.6022108793258667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35491737723350525, "epoch": 0.57, "learning_rate": 4.717244294167371e-05, "loss": 0.3691, "step": 669, "task_loss": 0.7374735474586487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2542036771774292, "epoch": 0.57, "learning_rate": 4.716821639898563e-05, "loss": 0.2639, "step": 670, "task_loss": 0.33632931113243103 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45093709230422974, "epoch": 0.57, "learning_rate": 4.716398985629755e-05, "loss": 0.3916, "step": 671, "task_loss": 1.0387479066848755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47321417927742004, "epoch": 0.57, "learning_rate": 4.715976331360947e-05, "loss": 0.411, "step": 672, "task_loss": 0.6558405756950378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37407171726226807, "epoch": 0.57, "learning_rate": 4.715553677092139e-05, "loss": 0.3168, "step": 673, "task_loss": 1.0233266353607178 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20857971906661987, "epoch": 0.57, "learning_rate": 4.715131022823331e-05, "loss": 0.3295, "step": 674, "task_loss": 0.37332332134246826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5774608254432678, "epoch": 0.57, "learning_rate": 4.714708368554522e-05, "loss": 0.525, "step": 675, "task_loss": 1.3238980770111084 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22712036967277527, "epoch": 0.57, "learning_rate": 4.714285714285714e-05, "loss": 0.4051, "step": 676, "task_loss": 0.6897934079170227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17959004640579224, "epoch": 0.57, "learning_rate": 4.713863060016906e-05, "loss": 0.3194, "step": 677, "task_loss": 0.3948770761489868 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2370845079421997, "epoch": 0.57, "learning_rate": 4.713440405748098e-05, "loss": 0.3015, "step": 678, "task_loss": 0.1519782990217209 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2547301948070526, "epoch": 0.57, "learning_rate": 4.71301775147929e-05, "loss": 0.3392, "step": 679, "task_loss": 0.42188867926597595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3425919711589813, "epoch": 0.57, "learning_rate": 4.712595097210482e-05, "loss": 0.2653, "step": 680, "task_loss": 0.8486734628677368 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28201133012771606, "epoch": 0.58, "learning_rate": 4.712172442941674e-05, "loss": 0.3232, "step": 681, "task_loss": 0.21899619698524475 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1665186733007431, "epoch": 0.58, "learning_rate": 4.7117497886728654e-05, "loss": 0.3467, "step": 682, "task_loss": 0.3213064968585968 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.553528904914856, "epoch": 0.58, "learning_rate": 4.7113271344040574e-05, "loss": 0.396, "step": 683, "task_loss": 0.5959455966949463 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20433126389980316, "epoch": 0.58, "learning_rate": 4.71090448013525e-05, "loss": 0.274, "step": 684, "task_loss": 0.6436899304389954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20822347700595856, "epoch": 0.58, "learning_rate": 4.7104818258664413e-05, "loss": 0.3299, "step": 685, "task_loss": 0.4979240596294403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46341797709465027, "epoch": 0.58, "learning_rate": 4.710059171597633e-05, "loss": 0.3165, "step": 686, "task_loss": 0.1802392452955246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34004417061805725, "epoch": 0.58, "learning_rate": 4.709636517328825e-05, "loss": 0.3236, "step": 687, "task_loss": 0.9467293620109558 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1860482394695282, "epoch": 0.58, "learning_rate": 4.7092138630600166e-05, "loss": 0.2697, "step": 688, "task_loss": 0.7974063754081726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4567367732524872, "epoch": 0.58, "learning_rate": 4.708791208791209e-05, "loss": 0.3866, "step": 689, "task_loss": 0.4629192650318146 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1610099971294403, "epoch": 0.58, "learning_rate": 4.708368554522401e-05, "loss": 0.2618, "step": 690, "task_loss": 0.3629488945007324 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.322256475687027, "epoch": 0.58, "learning_rate": 4.707945900253593e-05, "loss": 0.3583, "step": 691, "task_loss": 0.43364420533180237 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5227921009063721, "epoch": 0.58, "learning_rate": 4.7075232459847845e-05, "loss": 0.4195, "step": 692, "task_loss": 1.0156371593475342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3206583857536316, "epoch": 0.59, "learning_rate": 4.7071005917159765e-05, "loss": 0.4679, "step": 693, "task_loss": 1.4880452156066895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6341646313667297, "epoch": 0.59, "learning_rate": 4.7066779374471684e-05, "loss": 0.3936, "step": 694, "task_loss": 0.5662680864334106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3810037076473236, "epoch": 0.59, "learning_rate": 4.7062552831783604e-05, "loss": 0.3505, "step": 695, "task_loss": 1.1134657859802246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2382390946149826, "epoch": 0.59, "learning_rate": 4.7058326289095524e-05, "loss": 0.3228, "step": 696, "task_loss": 0.4972875118255615 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5607861280441284, "epoch": 0.59, "learning_rate": 4.7054099746407444e-05, "loss": 0.4341, "step": 697, "task_loss": 0.9519200325012207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49214595556259155, "epoch": 0.59, "learning_rate": 4.7049873203719357e-05, "loss": 0.4335, "step": 698, "task_loss": 0.3133102357387543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3421843647956848, "epoch": 0.59, "learning_rate": 4.7045646661031276e-05, "loss": 0.434, "step": 699, "task_loss": 0.6254091858863831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2969176471233368, "epoch": 0.59, "learning_rate": 4.7041420118343196e-05, "loss": 0.4202, "step": 700, "task_loss": 0.47923919558525085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2680771052837372, "epoch": 0.59, "learning_rate": 4.7037193575655116e-05, "loss": 0.4648, "step": 701, "task_loss": 0.5146772861480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26482564210891724, "epoch": 0.59, "learning_rate": 4.7032967032967035e-05, "loss": 0.3071, "step": 702, "task_loss": 0.809281051158905 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36773034930229187, "epoch": 0.59, "learning_rate": 4.7028740490278955e-05, "loss": 0.3868, "step": 703, "task_loss": 0.6665106415748596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18537673354148865, "epoch": 0.59, "learning_rate": 4.702451394759087e-05, "loss": 0.3274, "step": 704, "task_loss": 1.5008612871170044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45086485147476196, "epoch": 0.6, "learning_rate": 4.702028740490279e-05, "loss": 0.369, "step": 705, "task_loss": 1.3042993545532227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19615305960178375, "epoch": 0.6, "learning_rate": 4.7016060862214714e-05, "loss": 0.3271, "step": 706, "task_loss": 0.33956247568130493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.236154705286026, "epoch": 0.6, "learning_rate": 4.7011834319526634e-05, "loss": 0.3975, "step": 707, "task_loss": 0.5376070141792297 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25039881467819214, "epoch": 0.6, "learning_rate": 4.700760777683855e-05, "loss": 0.3244, "step": 708, "task_loss": 0.390200674533844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1740414947271347, "epoch": 0.6, "learning_rate": 4.700338123415047e-05, "loss": 0.5561, "step": 709, "task_loss": 0.3755144774913788 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24118320643901825, "epoch": 0.6, "learning_rate": 4.6999154691462387e-05, "loss": 0.3875, "step": 710, "task_loss": 0.849571943283081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2488633245229721, "epoch": 0.6, "learning_rate": 4.6994928148774306e-05, "loss": 0.3628, "step": 711, "task_loss": 1.3228331804275513 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40403005480766296, "epoch": 0.6, "learning_rate": 4.6990701606086226e-05, "loss": 0.4502, "step": 712, "task_loss": 0.7019414305686951 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23417091369628906, "epoch": 0.6, "learning_rate": 4.6986475063398146e-05, "loss": 0.3034, "step": 713, "task_loss": 0.18903414905071259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35916635394096375, "epoch": 0.6, "learning_rate": 4.698224852071006e-05, "loss": 0.405, "step": 714, "task_loss": 0.11952552944421768 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18628966808319092, "epoch": 0.6, "learning_rate": 4.697802197802198e-05, "loss": 0.2843, "step": 715, "task_loss": 0.5236517786979675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29364699125289917, "epoch": 0.6, "learning_rate": 4.69737954353339e-05, "loss": 0.3668, "step": 716, "task_loss": 0.9426444172859192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2838394045829773, "epoch": 0.61, "learning_rate": 4.696956889264582e-05, "loss": 0.4446, "step": 717, "task_loss": 0.7164893746376038 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2804027199745178, "epoch": 0.61, "learning_rate": 4.696534234995774e-05, "loss": 0.3231, "step": 718, "task_loss": 0.24467986822128296 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2521743178367615, "epoch": 0.61, "learning_rate": 4.696111580726966e-05, "loss": 0.3527, "step": 719, "task_loss": 1.05292809009552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46222758293151855, "epoch": 0.61, "learning_rate": 4.695688926458158e-05, "loss": 0.5009, "step": 720, "task_loss": 0.960830569267273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46111834049224854, "epoch": 0.61, "learning_rate": 4.695266272189349e-05, "loss": 0.3516, "step": 721, "task_loss": 0.8451549410820007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3342827558517456, "epoch": 0.61, "learning_rate": 4.694843617920541e-05, "loss": 0.3175, "step": 722, "task_loss": 0.5725528597831726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20860952138900757, "epoch": 0.61, "learning_rate": 4.6944209636517336e-05, "loss": 0.3185, "step": 723, "task_loss": 0.2770271897315979 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31454330682754517, "epoch": 0.61, "learning_rate": 4.693998309382925e-05, "loss": 0.4121, "step": 724, "task_loss": 0.6683022975921631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5281179547309875, "epoch": 0.61, "learning_rate": 4.693575655114117e-05, "loss": 0.3287, "step": 725, "task_loss": 1.185948133468628 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41321808099746704, "epoch": 0.61, "learning_rate": 4.693153000845309e-05, "loss": 0.4766, "step": 726, "task_loss": 0.46217796206474304 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29537543654441833, "epoch": 0.61, "learning_rate": 4.6927303465765e-05, "loss": 0.3582, "step": 727, "task_loss": 0.5150099396705627 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22204221785068512, "epoch": 0.61, "learning_rate": 4.692307692307693e-05, "loss": 0.364, "step": 728, "task_loss": 0.47867512702941895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22438563406467438, "epoch": 0.62, "learning_rate": 4.691885038038885e-05, "loss": 0.3536, "step": 729, "task_loss": 0.4347574710845947 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16139662265777588, "epoch": 0.62, "learning_rate": 4.691462383770076e-05, "loss": 0.3303, "step": 730, "task_loss": 0.3739808201789856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22806861996650696, "epoch": 0.62, "learning_rate": 4.691039729501268e-05, "loss": 0.369, "step": 731, "task_loss": 0.6877012252807617 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3352641463279724, "epoch": 0.62, "learning_rate": 4.69061707523246e-05, "loss": 0.3955, "step": 732, "task_loss": 1.2132909297943115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3059541583061218, "epoch": 0.62, "learning_rate": 4.690194420963652e-05, "loss": 0.4438, "step": 733, "task_loss": 1.191922903060913 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43167826533317566, "epoch": 0.62, "learning_rate": 4.689771766694844e-05, "loss": 0.3755, "step": 734, "task_loss": 0.6345545053482056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.354397177696228, "epoch": 0.62, "learning_rate": 4.689349112426036e-05, "loss": 0.4559, "step": 735, "task_loss": 0.4371297359466553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1969318389892578, "epoch": 0.62, "learning_rate": 4.688926458157228e-05, "loss": 0.2935, "step": 736, "task_loss": 0.4778875708580017 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3992832899093628, "epoch": 0.62, "learning_rate": 4.688503803888419e-05, "loss": 0.2957, "step": 737, "task_loss": 0.18934839963912964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1824398785829544, "epoch": 0.62, "learning_rate": 4.688081149619611e-05, "loss": 0.3074, "step": 738, "task_loss": 0.06955501437187195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2365836352109909, "epoch": 0.62, "learning_rate": 4.687658495350803e-05, "loss": 0.3694, "step": 739, "task_loss": 0.6758744120597839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23081645369529724, "epoch": 0.63, "learning_rate": 4.687235841081995e-05, "loss": 0.2714, "step": 740, "task_loss": 0.8412255048751831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3754105567932129, "epoch": 0.63, "learning_rate": 4.686813186813187e-05, "loss": 0.3583, "step": 741, "task_loss": 0.039537061005830765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48759883642196655, "epoch": 0.63, "learning_rate": 4.686390532544379e-05, "loss": 0.403, "step": 742, "task_loss": 0.6773413419723511 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38453003764152527, "epoch": 0.63, "learning_rate": 4.6859678782755704e-05, "loss": 0.4022, "step": 743, "task_loss": 1.2605183124542236 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3999258875846863, "epoch": 0.63, "learning_rate": 4.6855452240067624e-05, "loss": 0.3801, "step": 744, "task_loss": 1.3958191871643066 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46052882075309753, "epoch": 0.63, "learning_rate": 4.685122569737955e-05, "loss": 0.3765, "step": 745, "task_loss": 0.43500369787216187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19625413417816162, "epoch": 0.63, "learning_rate": 4.684699915469146e-05, "loss": 0.3736, "step": 746, "task_loss": 0.803209125995636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26730993390083313, "epoch": 0.63, "learning_rate": 4.684277261200338e-05, "loss": 0.3664, "step": 747, "task_loss": 0.3881904184818268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5900534391403198, "epoch": 0.63, "learning_rate": 4.68385460693153e-05, "loss": 0.4657, "step": 748, "task_loss": 0.9224446415901184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4307066798210144, "epoch": 0.63, "learning_rate": 4.683431952662722e-05, "loss": 0.3378, "step": 749, "task_loss": 0.3119797110557556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3169203996658325, "epoch": 0.63, "learning_rate": 4.683009298393914e-05, "loss": 0.4756, "step": 750, "task_loss": 0.44903531670570374 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4722621440887451, "epoch": 0.63, "learning_rate": 4.682586644125106e-05, "loss": 0.3441, "step": 751, "task_loss": 0.7778240442276001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21134699881076813, "epoch": 0.64, "learning_rate": 4.682163989856298e-05, "loss": 0.3761, "step": 752, "task_loss": 0.2646298408508301 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3201712965965271, "epoch": 0.64, "learning_rate": 4.6817413355874895e-05, "loss": 0.3234, "step": 753, "task_loss": 1.1199707984924316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2911157011985779, "epoch": 0.64, "learning_rate": 4.6813186813186814e-05, "loss": 0.3709, "step": 754, "task_loss": 0.6298675537109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2521316409111023, "epoch": 0.64, "learning_rate": 4.6808960270498734e-05, "loss": 0.273, "step": 755, "task_loss": 0.5158833861351013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11915340274572372, "epoch": 0.64, "learning_rate": 4.6804733727810654e-05, "loss": 0.2958, "step": 756, "task_loss": 0.5755556225776672 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2479076087474823, "epoch": 0.64, "learning_rate": 4.6800507185122574e-05, "loss": 0.3772, "step": 757, "task_loss": 0.3620893061161041 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3179338276386261, "epoch": 0.64, "learning_rate": 4.6796280642434493e-05, "loss": 0.337, "step": 758, "task_loss": 0.24180671572685242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13410022854804993, "epoch": 0.64, "learning_rate": 4.6792054099746406e-05, "loss": 0.2723, "step": 759, "task_loss": 0.0071316249668598175 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33512306213378906, "epoch": 0.64, "learning_rate": 4.6787827557058326e-05, "loss": 0.3455, "step": 760, "task_loss": 0.4476659297943115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1407025307416916, "epoch": 0.64, "learning_rate": 4.6783601014370246e-05, "loss": 0.3167, "step": 761, "task_loss": 0.5032293200492859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34007760882377625, "epoch": 0.64, "learning_rate": 4.6779374471682166e-05, "loss": 0.2459, "step": 762, "task_loss": 1.0502866506576538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34080156683921814, "epoch": 0.64, "learning_rate": 4.6775147928994085e-05, "loss": 0.4203, "step": 763, "task_loss": 0.1957993507385254 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40594416856765747, "epoch": 0.65, "learning_rate": 4.6770921386306005e-05, "loss": 0.3926, "step": 764, "task_loss": 0.4052309989929199 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32644131779670715, "epoch": 0.65, "learning_rate": 4.6766694843617925e-05, "loss": 0.329, "step": 765, "task_loss": 0.18310363590717316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22191733121871948, "epoch": 0.65, "learning_rate": 4.676246830092984e-05, "loss": 0.3957, "step": 766, "task_loss": 0.4479212462902069 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3019043803215027, "epoch": 0.65, "learning_rate": 4.6758241758241764e-05, "loss": 0.2918, "step": 767, "task_loss": 0.9111586809158325 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24184313416481018, "epoch": 0.65, "learning_rate": 4.6754015215553684e-05, "loss": 0.3361, "step": 768, "task_loss": 0.36255335807800293 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25905492901802063, "epoch": 0.65, "learning_rate": 4.67497886728656e-05, "loss": 0.3851, "step": 769, "task_loss": 1.5149391889572144 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32056835293769836, "epoch": 0.65, "learning_rate": 4.674556213017752e-05, "loss": 0.426, "step": 770, "task_loss": 0.435532808303833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28691959381103516, "epoch": 0.65, "learning_rate": 4.6741335587489436e-05, "loss": 0.3853, "step": 771, "task_loss": 0.24572913348674774 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.362898588180542, "epoch": 0.65, "learning_rate": 4.673710904480135e-05, "loss": 0.3312, "step": 772, "task_loss": 0.875820517539978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26760318875312805, "epoch": 0.65, "learning_rate": 4.6732882502113276e-05, "loss": 0.372, "step": 773, "task_loss": 0.4021333158016205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2651258111000061, "epoch": 0.65, "learning_rate": 4.6728655959425196e-05, "loss": 0.2993, "step": 774, "task_loss": 1.2189784049987793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2317265421152115, "epoch": 0.65, "learning_rate": 4.672442941673711e-05, "loss": 0.289, "step": 775, "task_loss": 0.8031799793243408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39754974842071533, "epoch": 0.66, "learning_rate": 4.672020287404903e-05, "loss": 0.3329, "step": 776, "task_loss": 0.6385886669158936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14862455427646637, "epoch": 0.66, "learning_rate": 4.671597633136095e-05, "loss": 0.2246, "step": 777, "task_loss": 0.12774179875850677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4029311537742615, "epoch": 0.66, "learning_rate": 4.671174978867287e-05, "loss": 0.3308, "step": 778, "task_loss": 0.7680000066757202 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31030893325805664, "epoch": 0.66, "learning_rate": 4.670752324598479e-05, "loss": 0.3138, "step": 779, "task_loss": 0.5516699552536011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28797948360443115, "epoch": 0.66, "learning_rate": 4.670329670329671e-05, "loss": 0.2463, "step": 780, "task_loss": 0.27283746004104614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22052887082099915, "epoch": 0.66, "learning_rate": 4.669907016060863e-05, "loss": 0.3427, "step": 781, "task_loss": 0.666221022605896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48894840478897095, "epoch": 0.66, "learning_rate": 4.669484361792054e-05, "loss": 0.3651, "step": 782, "task_loss": 1.259474515914917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3072489798069, "epoch": 0.66, "learning_rate": 4.669061707523246e-05, "loss": 0.2968, "step": 783, "task_loss": 0.22879329323768616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35828760266304016, "epoch": 0.66, "learning_rate": 4.668639053254438e-05, "loss": 0.4091, "step": 784, "task_loss": 0.590131402015686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2617449164390564, "epoch": 0.66, "learning_rate": 4.66821639898563e-05, "loss": 0.3515, "step": 785, "task_loss": 0.5642703175544739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19134745001792908, "epoch": 0.66, "learning_rate": 4.667793744716822e-05, "loss": 0.405, "step": 786, "task_loss": 0.09782329946756363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29645299911499023, "epoch": 0.66, "learning_rate": 4.667371090448014e-05, "loss": 0.3337, "step": 787, "task_loss": 0.527434229850769 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2847943902015686, "epoch": 0.67, "learning_rate": 4.666948436179205e-05, "loss": 0.2979, "step": 788, "task_loss": 0.2787574231624603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1994607150554657, "epoch": 0.67, "learning_rate": 4.666525781910397e-05, "loss": 0.277, "step": 789, "task_loss": 0.43980374932289124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18303309381008148, "epoch": 0.67, "learning_rate": 4.66610312764159e-05, "loss": 0.2804, "step": 790, "task_loss": 0.9812135100364685 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5979506969451904, "epoch": 0.67, "learning_rate": 4.665680473372781e-05, "loss": 0.4914, "step": 791, "task_loss": 0.4102175533771515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2620372176170349, "epoch": 0.67, "learning_rate": 4.665257819103973e-05, "loss": 0.3372, "step": 792, "task_loss": 0.9842884540557861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4323297142982483, "epoch": 0.67, "learning_rate": 4.664835164835165e-05, "loss": 0.3887, "step": 793, "task_loss": 0.7159919142723083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3561616539955139, "epoch": 0.67, "learning_rate": 4.664412510566357e-05, "loss": 0.3872, "step": 794, "task_loss": 0.49123963713645935 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28778600692749023, "epoch": 0.67, "learning_rate": 4.663989856297549e-05, "loss": 0.4273, "step": 795, "task_loss": 0.2565106451511383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14981184899806976, "epoch": 0.67, "learning_rate": 4.663567202028741e-05, "loss": 0.2308, "step": 796, "task_loss": 0.424303263425827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24332532286643982, "epoch": 0.67, "learning_rate": 4.663144547759933e-05, "loss": 0.317, "step": 797, "task_loss": 0.6541764140129089 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4180551767349243, "epoch": 0.67, "learning_rate": 4.662721893491124e-05, "loss": 0.3242, "step": 798, "task_loss": 1.287262201309204 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2564396858215332, "epoch": 0.67, "learning_rate": 4.662299239222316e-05, "loss": 0.4082, "step": 799, "task_loss": 0.5781082510948181 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15584638714790344, "epoch": 0.68, "learning_rate": 4.661876584953508e-05, "loss": 0.2786, "step": 800, "task_loss": 0.6881094574928284 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23558972775936127, "epoch": 0.68, "learning_rate": 4.6614539306847e-05, "loss": 0.2289, "step": 801, "task_loss": 0.1461523473262787 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.304515540599823, "epoch": 0.68, "learning_rate": 4.661031276415892e-05, "loss": 0.4035, "step": 802, "task_loss": 0.5185538530349731 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2558799386024475, "epoch": 0.68, "learning_rate": 4.660608622147084e-05, "loss": 0.4876, "step": 803, "task_loss": 0.6770008206367493 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2675953209400177, "epoch": 0.68, "learning_rate": 4.6601859678782754e-05, "loss": 0.2623, "step": 804, "task_loss": 1.2368402481079102 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3183937072753906, "epoch": 0.68, "learning_rate": 4.6597633136094674e-05, "loss": 0.3384, "step": 805, "task_loss": 0.8166303634643555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25635120272636414, "epoch": 0.68, "learning_rate": 4.6593406593406593e-05, "loss": 0.3408, "step": 806, "task_loss": 0.3825168311595917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2623569369316101, "epoch": 0.68, "learning_rate": 4.658918005071852e-05, "loss": 0.2882, "step": 807, "task_loss": 0.23788872361183167 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4122350811958313, "epoch": 0.68, "learning_rate": 4.658495350803043e-05, "loss": 0.3606, "step": 808, "task_loss": 0.8126048445701599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48980242013931274, "epoch": 0.68, "learning_rate": 4.658072696534235e-05, "loss": 0.2975, "step": 809, "task_loss": 1.1203809976577759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.49316003918647766, "epoch": 0.68, "learning_rate": 4.657650042265427e-05, "loss": 0.3358, "step": 810, "task_loss": 0.8615176677703857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1466982662677765, "epoch": 0.69, "learning_rate": 4.6572273879966185e-05, "loss": 0.331, "step": 811, "task_loss": 0.013799971900880337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18643391132354736, "epoch": 0.69, "learning_rate": 4.656804733727811e-05, "loss": 0.3577, "step": 812, "task_loss": 0.11287066340446472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2176889330148697, "epoch": 0.69, "learning_rate": 4.656382079459003e-05, "loss": 0.2355, "step": 813, "task_loss": 0.2188456952571869 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30954548716545105, "epoch": 0.69, "learning_rate": 4.6559594251901945e-05, "loss": 0.3371, "step": 814, "task_loss": 0.6223635673522949 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17586016654968262, "epoch": 0.69, "learning_rate": 4.6555367709213864e-05, "loss": 0.4006, "step": 815, "task_loss": 0.047391582280397415 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2198064625263214, "epoch": 0.69, "learning_rate": 4.6551141166525784e-05, "loss": 0.3579, "step": 816, "task_loss": 0.18386153876781464 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19883403182029724, "epoch": 0.69, "learning_rate": 4.6546914623837704e-05, "loss": 0.2644, "step": 817, "task_loss": 0.8972535133361816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3488885760307312, "epoch": 0.69, "learning_rate": 4.6542688081149624e-05, "loss": 0.3515, "step": 818, "task_loss": 0.8315311670303345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34419578313827515, "epoch": 0.69, "learning_rate": 4.653846153846154e-05, "loss": 0.2717, "step": 819, "task_loss": 0.7635257840156555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3127756118774414, "epoch": 0.69, "learning_rate": 4.6534234995773456e-05, "loss": 0.327, "step": 820, "task_loss": 0.28192371129989624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17077802121639252, "epoch": 0.69, "learning_rate": 4.6530008453085376e-05, "loss": 0.2726, "step": 821, "task_loss": 0.5848979353904724 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3321095108985901, "epoch": 0.69, "learning_rate": 4.6525781910397296e-05, "loss": 0.3545, "step": 822, "task_loss": 1.1666940450668335 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17902451753616333, "epoch": 0.7, "learning_rate": 4.6521555367709215e-05, "loss": 0.3196, "step": 823, "task_loss": 0.2585326135158539 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2681087255477905, "epoch": 0.7, "learning_rate": 4.6517328825021135e-05, "loss": 0.3005, "step": 824, "task_loss": 0.5993656516075134 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17954730987548828, "epoch": 0.7, "learning_rate": 4.6513102282333055e-05, "loss": 0.2502, "step": 825, "task_loss": 0.31453436613082886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2539949417114258, "epoch": 0.7, "learning_rate": 4.6508875739644975e-05, "loss": 0.2447, "step": 826, "task_loss": 0.5321019291877747 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4174681603908539, "epoch": 0.7, "learning_rate": 4.650464919695689e-05, "loss": 0.3154, "step": 827, "task_loss": 0.7458959817886353 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46995866298675537, "epoch": 0.7, "learning_rate": 4.650042265426881e-05, "loss": 0.3563, "step": 828, "task_loss": 0.34809648990631104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3645036816596985, "epoch": 0.7, "learning_rate": 4.6496196111580734e-05, "loss": 0.3703, "step": 829, "task_loss": 0.5023232698440552 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5025608539581299, "epoch": 0.7, "learning_rate": 4.649196956889265e-05, "loss": 0.3532, "step": 830, "task_loss": 0.3286297619342804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4966927468776703, "epoch": 0.7, "learning_rate": 4.6487743026204567e-05, "loss": 0.3588, "step": 831, "task_loss": 1.2833762168884277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2537088394165039, "epoch": 0.7, "learning_rate": 4.6483516483516486e-05, "loss": 0.2375, "step": 832, "task_loss": 0.506493091583252 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4204622507095337, "epoch": 0.7, "learning_rate": 4.64792899408284e-05, "loss": 0.4207, "step": 833, "task_loss": 1.147786259651184 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4535294771194458, "epoch": 0.7, "learning_rate": 4.6475063398140326e-05, "loss": 0.2769, "step": 834, "task_loss": 0.2901158928871155 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18148492276668549, "epoch": 0.71, "learning_rate": 4.6470836855452246e-05, "loss": 0.298, "step": 835, "task_loss": 0.49322766065597534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2675839066505432, "epoch": 0.71, "learning_rate": 4.6466610312764165e-05, "loss": 0.2872, "step": 836, "task_loss": 0.7758223414421082 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4015640914440155, "epoch": 0.71, "learning_rate": 4.646238377007608e-05, "loss": 0.3793, "step": 837, "task_loss": 0.6204982995986938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2344849705696106, "epoch": 0.71, "learning_rate": 4.6458157227388e-05, "loss": 0.3693, "step": 838, "task_loss": 0.8100611567497253 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4160458743572235, "epoch": 0.71, "learning_rate": 4.645393068469992e-05, "loss": 0.3833, "step": 839, "task_loss": 0.20661017298698425 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3408738374710083, "epoch": 0.71, "learning_rate": 4.644970414201184e-05, "loss": 0.3255, "step": 840, "task_loss": 0.8161397576332092 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28762492537498474, "epoch": 0.71, "learning_rate": 4.644547759932376e-05, "loss": 0.4359, "step": 841, "task_loss": 0.6441768407821655 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4925372004508972, "epoch": 0.71, "learning_rate": 4.644125105663568e-05, "loss": 0.4242, "step": 842, "task_loss": 0.5591533184051514 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2857370674610138, "epoch": 0.71, "learning_rate": 4.643702451394759e-05, "loss": 0.3492, "step": 843, "task_loss": 0.14419540762901306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1425875723361969, "epoch": 0.71, "learning_rate": 4.643279797125951e-05, "loss": 0.2699, "step": 844, "task_loss": 0.029374532401561737 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2444223165512085, "epoch": 0.71, "learning_rate": 4.642857142857143e-05, "loss": 0.2677, "step": 845, "task_loss": 0.38509276509284973 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5488537549972534, "epoch": 0.71, "learning_rate": 4.642434488588335e-05, "loss": 0.4558, "step": 846, "task_loss": 0.3536800444126129 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18831950426101685, "epoch": 0.72, "learning_rate": 4.642011834319527e-05, "loss": 0.2165, "step": 847, "task_loss": 1.4712738990783691 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20634722709655762, "epoch": 0.72, "learning_rate": 4.641589180050719e-05, "loss": 0.3429, "step": 848, "task_loss": 0.8899732828140259 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48802900314331055, "epoch": 0.72, "learning_rate": 4.64116652578191e-05, "loss": 0.3858, "step": 849, "task_loss": 0.7615939378738403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23451358079910278, "epoch": 0.72, "learning_rate": 4.640743871513102e-05, "loss": 0.3009, "step": 850, "task_loss": 1.2422040700912476 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.361864298582077, "epoch": 0.72, "learning_rate": 4.640321217244295e-05, "loss": 0.4675, "step": 851, "task_loss": 0.8000878095626831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20752942562103271, "epoch": 0.72, "learning_rate": 4.639898562975487e-05, "loss": 0.2776, "step": 852, "task_loss": 0.3167298138141632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.371992290019989, "epoch": 0.72, "learning_rate": 4.639475908706678e-05, "loss": 0.3352, "step": 853, "task_loss": 0.3330743908882141 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35249629616737366, "epoch": 0.72, "learning_rate": 4.63905325443787e-05, "loss": 0.2617, "step": 854, "task_loss": 0.25995922088623047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46947115659713745, "epoch": 0.72, "learning_rate": 4.638630600169062e-05, "loss": 0.4313, "step": 855, "task_loss": 0.8979749083518982 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3745986819267273, "epoch": 0.72, "learning_rate": 4.638207945900254e-05, "loss": 0.2747, "step": 856, "task_loss": 0.10766928642988205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32626044750213623, "epoch": 0.72, "learning_rate": 4.637785291631446e-05, "loss": 0.4161, "step": 857, "task_loss": 0.4534900188446045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33935442566871643, "epoch": 0.72, "learning_rate": 4.637362637362638e-05, "loss": 0.3865, "step": 858, "task_loss": 1.0689483880996704 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3594878017902374, "epoch": 0.73, "learning_rate": 4.636939983093829e-05, "loss": 0.2921, "step": 859, "task_loss": 1.1598310470581055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2799212634563446, "epoch": 0.73, "learning_rate": 4.636517328825021e-05, "loss": 0.3019, "step": 860, "task_loss": 0.27010488510131836 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41094690561294556, "epoch": 0.73, "learning_rate": 4.636094674556213e-05, "loss": 0.3601, "step": 861, "task_loss": 0.22743143141269684 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2764747440814972, "epoch": 0.73, "learning_rate": 4.635672020287405e-05, "loss": 0.3251, "step": 862, "task_loss": 0.6687231063842773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.47841474413871765, "epoch": 0.73, "learning_rate": 4.635249366018597e-05, "loss": 0.2957, "step": 863, "task_loss": 0.2392089068889618 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2663487493991852, "epoch": 0.73, "learning_rate": 4.634826711749789e-05, "loss": 0.2701, "step": 864, "task_loss": 0.3156798481941223 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3532913625240326, "epoch": 0.73, "learning_rate": 4.634404057480981e-05, "loss": 0.3206, "step": 865, "task_loss": 0.9060441255569458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24406850337982178, "epoch": 0.73, "learning_rate": 4.6339814032121724e-05, "loss": 0.3477, "step": 866, "task_loss": 0.3608352541923523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2158551812171936, "epoch": 0.73, "learning_rate": 4.633558748943364e-05, "loss": 0.28, "step": 867, "task_loss": 1.2199188470840454 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22323551774024963, "epoch": 0.73, "learning_rate": 4.633136094674557e-05, "loss": 0.3782, "step": 868, "task_loss": 0.14097052812576294 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30778181552886963, "epoch": 0.73, "learning_rate": 4.632713440405748e-05, "loss": 0.3384, "step": 869, "task_loss": 0.597816526889801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2497592568397522, "epoch": 0.73, "learning_rate": 4.63229078613694e-05, "loss": 0.3245, "step": 870, "task_loss": 0.6203656196594238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2544447183609009, "epoch": 0.74, "learning_rate": 4.631868131868132e-05, "loss": 0.2809, "step": 871, "task_loss": 0.9502989053726196 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33307886123657227, "epoch": 0.74, "learning_rate": 4.6314454775993235e-05, "loss": 0.361, "step": 872, "task_loss": 0.46259069442749023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21646437048912048, "epoch": 0.74, "learning_rate": 4.631022823330516e-05, "loss": 0.236, "step": 873, "task_loss": 0.5346273183822632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1888790726661682, "epoch": 0.74, "learning_rate": 4.630600169061708e-05, "loss": 0.3321, "step": 874, "task_loss": 0.37077441811561584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24533206224441528, "epoch": 0.74, "learning_rate": 4.6301775147928994e-05, "loss": 0.3313, "step": 875, "task_loss": 0.47546735405921936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36210837960243225, "epoch": 0.74, "learning_rate": 4.6297548605240914e-05, "loss": 0.3366, "step": 876, "task_loss": 0.5125508308410645 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27049046754837036, "epoch": 0.74, "learning_rate": 4.6293322062552834e-05, "loss": 0.2747, "step": 877, "task_loss": 0.748110830783844 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3963201642036438, "epoch": 0.74, "learning_rate": 4.6289095519864754e-05, "loss": 0.3637, "step": 878, "task_loss": 0.707857072353363 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4828207194805145, "epoch": 0.74, "learning_rate": 4.628486897717667e-05, "loss": 0.3784, "step": 879, "task_loss": 0.6764346957206726 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5259439945220947, "epoch": 0.74, "learning_rate": 4.628064243448859e-05, "loss": 0.3544, "step": 880, "task_loss": 0.8510361909866333 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2657608687877655, "epoch": 0.74, "learning_rate": 4.627641589180051e-05, "loss": 0.2309, "step": 881, "task_loss": 0.23221217095851898 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21574905514717102, "epoch": 0.75, "learning_rate": 4.6272189349112426e-05, "loss": 0.3528, "step": 882, "task_loss": 0.24643269181251526 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2436504065990448, "epoch": 0.75, "learning_rate": 4.6267962806424346e-05, "loss": 0.3622, "step": 883, "task_loss": 0.7056463360786438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32132038474082947, "epoch": 0.75, "learning_rate": 4.6263736263736265e-05, "loss": 0.3267, "step": 884, "task_loss": 0.1795760542154312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2810857594013214, "epoch": 0.75, "learning_rate": 4.6259509721048185e-05, "loss": 0.3109, "step": 885, "task_loss": 1.6597498655319214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4146636724472046, "epoch": 0.75, "learning_rate": 4.6255283178360105e-05, "loss": 0.309, "step": 886, "task_loss": 0.8720802664756775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3879651129245758, "epoch": 0.75, "learning_rate": 4.6251056635672024e-05, "loss": 0.328, "step": 887, "task_loss": 0.4393438398838043 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38941875100135803, "epoch": 0.75, "learning_rate": 4.624683009298394e-05, "loss": 0.3296, "step": 888, "task_loss": 0.7270287871360779 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28111058473587036, "epoch": 0.75, "learning_rate": 4.624260355029586e-05, "loss": 0.2789, "step": 889, "task_loss": 0.5853151082992554 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14975634217262268, "epoch": 0.75, "learning_rate": 4.6238377007607784e-05, "loss": 0.2629, "step": 890, "task_loss": 0.4016755521297455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5038657188415527, "epoch": 0.75, "learning_rate": 4.62341504649197e-05, "loss": 0.3411, "step": 891, "task_loss": 1.3833951950073242 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1345922350883484, "epoch": 0.75, "learning_rate": 4.6229923922231616e-05, "loss": 0.2325, "step": 892, "task_loss": 0.5056750178337097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31020522117614746, "epoch": 0.75, "learning_rate": 4.6225697379543536e-05, "loss": 0.4206, "step": 893, "task_loss": 0.39337393641471863 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2663968801498413, "epoch": 0.76, "learning_rate": 4.6221470836855456e-05, "loss": 0.3171, "step": 894, "task_loss": 1.3517099618911743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3160415589809418, "epoch": 0.76, "learning_rate": 4.621724429416737e-05, "loss": 0.3034, "step": 895, "task_loss": 0.10319017618894577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2861885726451874, "epoch": 0.76, "learning_rate": 4.6213017751479295e-05, "loss": 0.4249, "step": 896, "task_loss": 0.3836461305618286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36938178539276123, "epoch": 0.76, "learning_rate": 4.6208791208791215e-05, "loss": 0.3556, "step": 897, "task_loss": 1.2703704833984375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3103448748588562, "epoch": 0.76, "learning_rate": 4.620456466610313e-05, "loss": 0.314, "step": 898, "task_loss": 0.19264638423919678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31108522415161133, "epoch": 0.76, "learning_rate": 4.620033812341505e-05, "loss": 0.3194, "step": 899, "task_loss": 1.2347233295440674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25132980942726135, "epoch": 0.76, "learning_rate": 4.619611158072697e-05, "loss": 0.3871, "step": 900, "task_loss": 0.28908833861351013 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1578858196735382, "epoch": 0.76, "learning_rate": 4.619188503803889e-05, "loss": 0.2732, "step": 901, "task_loss": 0.9690726399421692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5275071859359741, "epoch": 0.76, "learning_rate": 4.618765849535081e-05, "loss": 0.2932, "step": 902, "task_loss": 0.870735764503479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2782747149467468, "epoch": 0.76, "learning_rate": 4.618343195266273e-05, "loss": 0.3723, "step": 903, "task_loss": 0.8498339653015137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5097308158874512, "epoch": 0.76, "learning_rate": 4.617920540997464e-05, "loss": 0.3943, "step": 904, "task_loss": 1.1436095237731934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43819373846054077, "epoch": 0.76, "learning_rate": 4.617497886728656e-05, "loss": 0.319, "step": 905, "task_loss": 0.8787400126457214 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19291357696056366, "epoch": 0.77, "learning_rate": 4.617075232459848e-05, "loss": 0.3443, "step": 906, "task_loss": 0.9966549277305603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4310792088508606, "epoch": 0.77, "learning_rate": 4.61665257819104e-05, "loss": 0.4374, "step": 907, "task_loss": 0.5687097907066345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3214266300201416, "epoch": 0.77, "learning_rate": 4.616229923922232e-05, "loss": 0.3351, "step": 908, "task_loss": 1.3388911485671997 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38909125328063965, "epoch": 0.77, "learning_rate": 4.615807269653424e-05, "loss": 0.4006, "step": 909, "task_loss": 0.4524170160293579 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22019779682159424, "epoch": 0.77, "learning_rate": 4.615384615384616e-05, "loss": 0.3276, "step": 910, "task_loss": 0.6273325681686401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33304253220558167, "epoch": 0.77, "learning_rate": 4.614961961115807e-05, "loss": 0.347, "step": 911, "task_loss": 0.2653331160545349 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3374251425266266, "epoch": 0.77, "learning_rate": 4.614539306846999e-05, "loss": 0.3965, "step": 912, "task_loss": 0.6541954874992371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26544278860092163, "epoch": 0.77, "learning_rate": 4.614116652578192e-05, "loss": 0.2837, "step": 913, "task_loss": 1.2303000688552856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2579417824745178, "epoch": 0.77, "learning_rate": 4.613693998309383e-05, "loss": 0.3092, "step": 914, "task_loss": 1.1536496877670288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26663273572921753, "epoch": 0.77, "learning_rate": 4.613271344040575e-05, "loss": 0.2463, "step": 915, "task_loss": 0.7196165323257446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28072217106819153, "epoch": 0.77, "learning_rate": 4.612848689771767e-05, "loss": 0.2591, "step": 916, "task_loss": 0.5546468496322632 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23951895534992218, "epoch": 0.77, "learning_rate": 4.612426035502958e-05, "loss": 0.3292, "step": 917, "task_loss": 1.281814694404602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.432782918214798, "epoch": 0.78, "learning_rate": 4.612003381234151e-05, "loss": 0.3524, "step": 918, "task_loss": 0.5389975309371948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952663004398346, "epoch": 0.78, "learning_rate": 4.611580726965343e-05, "loss": 0.317, "step": 919, "task_loss": 0.9771247506141663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46760207414627075, "epoch": 0.78, "learning_rate": 4.611158072696534e-05, "loss": 0.2948, "step": 920, "task_loss": 1.008791208267212 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21138301491737366, "epoch": 0.78, "learning_rate": 4.610735418427726e-05, "loss": 0.275, "step": 921, "task_loss": 0.5766026377677917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3450990915298462, "epoch": 0.78, "learning_rate": 4.610312764158918e-05, "loss": 0.336, "step": 922, "task_loss": 0.39770257472991943 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24819274246692657, "epoch": 0.78, "learning_rate": 4.60989010989011e-05, "loss": 0.3951, "step": 923, "task_loss": 1.1994588375091553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26219120621681213, "epoch": 0.78, "learning_rate": 4.609467455621302e-05, "loss": 0.3992, "step": 924, "task_loss": 1.3631459474563599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39332079887390137, "epoch": 0.78, "learning_rate": 4.609044801352494e-05, "loss": 0.4493, "step": 925, "task_loss": 1.2999727725982666 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2386210858821869, "epoch": 0.78, "learning_rate": 4.608622147083686e-05, "loss": 0.27, "step": 926, "task_loss": 0.5237141251564026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31889891624450684, "epoch": 0.78, "learning_rate": 4.608199492814877e-05, "loss": 0.2055, "step": 927, "task_loss": 0.12464319914579391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29726114869117737, "epoch": 0.78, "learning_rate": 4.607776838546069e-05, "loss": 0.3837, "step": 928, "task_loss": 0.2828030288219452 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5032831430435181, "epoch": 0.78, "learning_rate": 4.607354184277261e-05, "loss": 0.3447, "step": 929, "task_loss": 0.8295583128929138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31472247838974, "epoch": 0.79, "learning_rate": 4.606931530008453e-05, "loss": 0.3694, "step": 930, "task_loss": 1.5383825302124023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27644604444503784, "epoch": 0.79, "learning_rate": 4.606508875739645e-05, "loss": 0.3165, "step": 931, "task_loss": 0.7559335231781006 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18283003568649292, "epoch": 0.79, "learning_rate": 4.606086221470837e-05, "loss": 0.1992, "step": 932, "task_loss": 0.144075408577919 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4631301164627075, "epoch": 0.79, "learning_rate": 4.6056635672020285e-05, "loss": 0.3971, "step": 933, "task_loss": 1.6281708478927612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42738059163093567, "epoch": 0.79, "learning_rate": 4.6052409129332205e-05, "loss": 0.3377, "step": 934, "task_loss": 0.6741576790809631 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3566298186779022, "epoch": 0.79, "learning_rate": 4.604818258664413e-05, "loss": 0.3676, "step": 935, "task_loss": 0.27528154850006104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.330236554145813, "epoch": 0.79, "learning_rate": 4.6043956043956044e-05, "loss": 0.3648, "step": 936, "task_loss": 0.20326869189739227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1568087637424469, "epoch": 0.79, "learning_rate": 4.6039729501267964e-05, "loss": 0.3553, "step": 937, "task_loss": 0.3123857080936432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5085207223892212, "epoch": 0.79, "learning_rate": 4.6035502958579884e-05, "loss": 0.3909, "step": 938, "task_loss": 0.6514917016029358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.154745414853096, "epoch": 0.79, "learning_rate": 4.6031276415891803e-05, "loss": 0.3043, "step": 939, "task_loss": 0.2217102348804474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26345983147621155, "epoch": 0.79, "learning_rate": 4.602704987320372e-05, "loss": 0.3337, "step": 940, "task_loss": 1.1459896564483643 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32549530267715454, "epoch": 0.79, "learning_rate": 4.602282333051564e-05, "loss": 0.2628, "step": 941, "task_loss": 0.7183759212493896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24363887310028076, "epoch": 0.8, "learning_rate": 4.601859678782756e-05, "loss": 0.322, "step": 942, "task_loss": 0.33217161893844604 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4149688482284546, "epoch": 0.8, "learning_rate": 4.6014370245139476e-05, "loss": 0.307, "step": 943, "task_loss": 0.26546531915664673 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48364436626434326, "epoch": 0.8, "learning_rate": 4.6010143702451395e-05, "loss": 0.3325, "step": 944, "task_loss": 0.4470929503440857 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22630174458026886, "epoch": 0.8, "learning_rate": 4.6005917159763315e-05, "loss": 0.3402, "step": 945, "task_loss": 0.6232134699821472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17915762960910797, "epoch": 0.8, "learning_rate": 4.6001690617075235e-05, "loss": 0.2294, "step": 946, "task_loss": 0.3601345717906952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28538429737091064, "epoch": 0.8, "learning_rate": 4.5997464074387155e-05, "loss": 0.2676, "step": 947, "task_loss": 0.43201231956481934 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27372103929519653, "epoch": 0.8, "learning_rate": 4.5993237531699074e-05, "loss": 0.2827, "step": 948, "task_loss": 1.5435055494308472 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2887795567512512, "epoch": 0.8, "learning_rate": 4.598901098901099e-05, "loss": 0.3111, "step": 949, "task_loss": 0.3925054967403412 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2809564471244812, "epoch": 0.8, "learning_rate": 4.598478444632291e-05, "loss": 0.2503, "step": 950, "task_loss": 0.7698982357978821 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25734391808509827, "epoch": 0.8, "learning_rate": 4.598055790363483e-05, "loss": 0.2912, "step": 951, "task_loss": 0.41852739453315735 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.395840048789978, "epoch": 0.8, "learning_rate": 4.597633136094675e-05, "loss": 0.3705, "step": 952, "task_loss": 0.505364716053009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16447016596794128, "epoch": 0.81, "learning_rate": 4.5972104818258666e-05, "loss": 0.3264, "step": 953, "task_loss": 0.39294782280921936 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3487350046634674, "epoch": 0.81, "learning_rate": 4.5967878275570586e-05, "loss": 0.32, "step": 954, "task_loss": 1.0585432052612305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2846418023109436, "epoch": 0.81, "learning_rate": 4.5963651732882506e-05, "loss": 0.3766, "step": 955, "task_loss": 0.32126832008361816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.123996302485466, "epoch": 0.81, "learning_rate": 4.595942519019442e-05, "loss": 0.2981, "step": 956, "task_loss": 0.019682593643665314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2558656632900238, "epoch": 0.81, "learning_rate": 4.5955198647506345e-05, "loss": 0.2799, "step": 957, "task_loss": 0.8895480036735535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.48175427317619324, "epoch": 0.81, "learning_rate": 4.5950972104818265e-05, "loss": 0.4012, "step": 958, "task_loss": 0.6715694069862366 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2468600869178772, "epoch": 0.81, "learning_rate": 4.594674556213018e-05, "loss": 0.3789, "step": 959, "task_loss": 0.40784138441085815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28495824337005615, "epoch": 0.81, "learning_rate": 4.59425190194421e-05, "loss": 0.3826, "step": 960, "task_loss": 0.3370976746082306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2992165684700012, "epoch": 0.81, "learning_rate": 4.593829247675402e-05, "loss": 0.315, "step": 961, "task_loss": 0.10349813103675842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39707690477371216, "epoch": 0.81, "learning_rate": 4.593406593406594e-05, "loss": 0.3413, "step": 962, "task_loss": 0.5664233565330505 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2814026474952698, "epoch": 0.81, "learning_rate": 4.592983939137786e-05, "loss": 0.3904, "step": 963, "task_loss": 1.0049792528152466 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19465628266334534, "epoch": 0.81, "learning_rate": 4.5925612848689777e-05, "loss": 0.2879, "step": 964, "task_loss": 0.07359443604946136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3513874411582947, "epoch": 0.82, "learning_rate": 4.592138630600169e-05, "loss": 0.3549, "step": 965, "task_loss": 0.5460289716720581 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33203014731407166, "epoch": 0.82, "learning_rate": 4.591715976331361e-05, "loss": 0.3409, "step": 966, "task_loss": 1.481011986732483 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27470993995666504, "epoch": 0.82, "learning_rate": 4.591293322062553e-05, "loss": 0.3305, "step": 967, "task_loss": 0.4593425393104553 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.11648278683423996, "epoch": 0.82, "learning_rate": 4.590870667793745e-05, "loss": 0.321, "step": 968, "task_loss": 0.3774500787258148 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.353023886680603, "epoch": 0.82, "learning_rate": 4.590448013524937e-05, "loss": 0.3351, "step": 969, "task_loss": 1.1915583610534668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2356659471988678, "epoch": 0.82, "learning_rate": 4.590025359256129e-05, "loss": 0.2714, "step": 970, "task_loss": 0.4875516891479492 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37664133310317993, "epoch": 0.82, "learning_rate": 4.589602704987321e-05, "loss": 0.3707, "step": 971, "task_loss": 0.6027059555053711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2550051212310791, "epoch": 0.82, "learning_rate": 4.589180050718512e-05, "loss": 0.269, "step": 972, "task_loss": 0.33342424035072327 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4495178461074829, "epoch": 0.82, "learning_rate": 4.588757396449704e-05, "loss": 0.3211, "step": 973, "task_loss": 0.6046321392059326 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21325893700122833, "epoch": 0.82, "learning_rate": 4.588334742180897e-05, "loss": 0.3904, "step": 974, "task_loss": 0.1933622509241104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2678370177745819, "epoch": 0.82, "learning_rate": 4.587912087912088e-05, "loss": 0.2613, "step": 975, "task_loss": 0.39825576543807983 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18920046091079712, "epoch": 0.82, "learning_rate": 4.58748943364328e-05, "loss": 0.4215, "step": 976, "task_loss": 0.5402249097824097 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23081913590431213, "epoch": 0.83, "learning_rate": 4.587066779374472e-05, "loss": 0.3163, "step": 977, "task_loss": 1.4790289402008057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21092328429222107, "epoch": 0.83, "learning_rate": 4.586644125105663e-05, "loss": 0.2447, "step": 978, "task_loss": 0.6820929050445557 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.30764931440353394, "epoch": 0.83, "learning_rate": 4.586221470836856e-05, "loss": 0.3149, "step": 979, "task_loss": 0.20430991053581238 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2513306140899658, "epoch": 0.83, "learning_rate": 4.585798816568048e-05, "loss": 0.3218, "step": 980, "task_loss": 1.1656993627548218 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2478112280368805, "epoch": 0.83, "learning_rate": 4.58537616229924e-05, "loss": 0.3601, "step": 981, "task_loss": 1.4366612434387207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4809786081314087, "epoch": 0.83, "learning_rate": 4.584953508030431e-05, "loss": 0.3599, "step": 982, "task_loss": 0.32178795337677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3879166841506958, "epoch": 0.83, "learning_rate": 4.584530853761623e-05, "loss": 0.3039, "step": 983, "task_loss": 0.35262659192085266 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20825481414794922, "epoch": 0.83, "learning_rate": 4.584108199492815e-05, "loss": 0.3586, "step": 984, "task_loss": 0.8200913071632385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5232114791870117, "epoch": 0.83, "learning_rate": 4.583685545224007e-05, "loss": 0.3648, "step": 985, "task_loss": 1.6637427806854248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35127195715904236, "epoch": 0.83, "learning_rate": 4.583262890955199e-05, "loss": 0.3242, "step": 986, "task_loss": 1.1167726516723633 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3223801255226135, "epoch": 0.83, "learning_rate": 4.582840236686391e-05, "loss": 0.3698, "step": 987, "task_loss": 1.1016654968261719 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26718980073928833, "epoch": 0.83, "learning_rate": 4.582417582417582e-05, "loss": 0.306, "step": 988, "task_loss": 0.582869827747345 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.285981684923172, "epoch": 0.84, "learning_rate": 4.581994928148774e-05, "loss": 0.3058, "step": 989, "task_loss": 0.9653950333595276 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3817080855369568, "epoch": 0.84, "learning_rate": 4.581572273879966e-05, "loss": 0.2993, "step": 990, "task_loss": 0.8561366200447083 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22112765908241272, "epoch": 0.84, "learning_rate": 4.581149619611158e-05, "loss": 0.2875, "step": 991, "task_loss": 0.6691679954528809 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2103605717420578, "epoch": 0.84, "learning_rate": 4.58072696534235e-05, "loss": 0.2907, "step": 992, "task_loss": 1.021261215209961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39024853706359863, "epoch": 0.84, "learning_rate": 4.580304311073542e-05, "loss": 0.3313, "step": 993, "task_loss": 0.17300234735012054 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4057610034942627, "epoch": 0.84, "learning_rate": 4.5798816568047335e-05, "loss": 0.334, "step": 994, "task_loss": 1.045444369316101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37642043828964233, "epoch": 0.84, "learning_rate": 4.5794590025359255e-05, "loss": 0.3436, "step": 995, "task_loss": 0.3469464182853699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3785593509674072, "epoch": 0.84, "learning_rate": 4.579036348267118e-05, "loss": 0.3412, "step": 996, "task_loss": 0.40450188517570496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3710609972476959, "epoch": 0.84, "learning_rate": 4.57861369399831e-05, "loss": 0.3786, "step": 997, "task_loss": 1.002790927886963 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21029534935951233, "epoch": 0.84, "learning_rate": 4.5781910397295014e-05, "loss": 0.3131, "step": 998, "task_loss": 1.4204806089401245 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2556370496749878, "epoch": 0.84, "learning_rate": 4.5777683854606934e-05, "loss": 0.4118, "step": 999, "task_loss": 1.1292375326156616 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3558931350708008, "epoch": 0.84, "learning_rate": 4.577345731191885e-05, "loss": 0.2613, "step": 1000, "task_loss": 0.49822065234184265 }, { "epoch": 0.84, "eval_accuracy": 0.913940594059406, "eval_loss": 0.18967993557453156, "eval_runtime": 338.0531, "eval_samples_per_second": 74.692, "eval_steps_per_second": 0.586, "step": 1000 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23912113904953003, "epoch": 0.85, "learning_rate": 4.576923076923077e-05, "loss": 0.3496, "step": 1001, "task_loss": 0.3939993381500244 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2952667474746704, "epoch": 0.85, "learning_rate": 4.576500422654269e-05, "loss": 0.3001, "step": 1002, "task_loss": 0.44996291399002075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14472924172878265, "epoch": 0.85, "learning_rate": 4.576077768385461e-05, "loss": 0.3498, "step": 1003, "task_loss": 0.209279865026474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31769293546676636, "epoch": 0.85, "learning_rate": 4.5756551141166525e-05, "loss": 0.3995, "step": 1004, "task_loss": 0.5328032374382019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8548835515975952, "epoch": 0.85, "learning_rate": 4.5752324598478445e-05, "loss": 0.4194, "step": 1005, "task_loss": 1.5901955366134644 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5097059011459351, "epoch": 0.85, "learning_rate": 4.5748098055790365e-05, "loss": 0.4045, "step": 1006, "task_loss": 0.09073382616043091 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17884889245033264, "epoch": 0.85, "learning_rate": 4.5743871513102285e-05, "loss": 0.32, "step": 1007, "task_loss": 0.22524794936180115 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2864243686199188, "epoch": 0.85, "learning_rate": 4.5739644970414204e-05, "loss": 0.35, "step": 1008, "task_loss": 0.5916472673416138 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39622369408607483, "epoch": 0.85, "learning_rate": 4.5735418427726124e-05, "loss": 0.3036, "step": 1009, "task_loss": 1.0695246458053589 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3557763397693634, "epoch": 0.85, "learning_rate": 4.573119188503804e-05, "loss": 0.4268, "step": 1010, "task_loss": 1.484282374382019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.43481993675231934, "epoch": 0.85, "learning_rate": 4.572696534234996e-05, "loss": 0.3944, "step": 1011, "task_loss": 0.8948748111724854 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14724287390708923, "epoch": 0.85, "learning_rate": 4.572273879966188e-05, "loss": 0.2871, "step": 1012, "task_loss": 0.2009107768535614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14038264751434326, "epoch": 0.86, "learning_rate": 4.57185122569738e-05, "loss": 0.2934, "step": 1013, "task_loss": 0.16116684675216675 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26486438512802124, "epoch": 0.86, "learning_rate": 4.5714285714285716e-05, "loss": 0.3377, "step": 1014, "task_loss": 0.20203635096549988 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14152994751930237, "epoch": 0.86, "learning_rate": 4.5710059171597636e-05, "loss": 0.2721, "step": 1015, "task_loss": 0.9633371233940125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26985055208206177, "epoch": 0.86, "learning_rate": 4.5705832628909556e-05, "loss": 0.3403, "step": 1016, "task_loss": 0.47459304332733154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15847118198871613, "epoch": 0.86, "learning_rate": 4.570160608622147e-05, "loss": 0.3049, "step": 1017, "task_loss": 0.11324185878038406 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36625000834465027, "epoch": 0.86, "learning_rate": 4.5697379543533395e-05, "loss": 0.3174, "step": 1018, "task_loss": 0.8263206481933594 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42509859800338745, "epoch": 0.86, "learning_rate": 4.5693153000845315e-05, "loss": 0.4844, "step": 1019, "task_loss": 0.5466962456703186 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25256964564323425, "epoch": 0.86, "learning_rate": 4.568892645815723e-05, "loss": 0.2754, "step": 1020, "task_loss": 0.9385986924171448 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.13900268077850342, "epoch": 0.86, "learning_rate": 4.568469991546915e-05, "loss": 0.4031, "step": 1021, "task_loss": 0.08166217803955078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14120763540267944, "epoch": 0.86, "learning_rate": 4.568047337278107e-05, "loss": 0.2618, "step": 1022, "task_loss": 0.01267182920128107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26838311553001404, "epoch": 0.86, "learning_rate": 4.567624683009298e-05, "loss": 0.2616, "step": 1023, "task_loss": 1.0019125938415527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1619986593723297, "epoch": 0.87, "learning_rate": 4.567202028740491e-05, "loss": 0.2586, "step": 1024, "task_loss": 0.11912364512681961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26556265354156494, "epoch": 0.87, "learning_rate": 4.5667793744716826e-05, "loss": 0.3354, "step": 1025, "task_loss": 0.5522617697715759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.45204195380210876, "epoch": 0.87, "learning_rate": 4.5663567202028746e-05, "loss": 0.3513, "step": 1026, "task_loss": 0.2980015277862549 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40704768896102905, "epoch": 0.87, "learning_rate": 4.565934065934066e-05, "loss": 0.3226, "step": 1027, "task_loss": 0.3302595317363739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28598201274871826, "epoch": 0.87, "learning_rate": 4.565511411665258e-05, "loss": 0.3815, "step": 1028, "task_loss": 0.76263827085495 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20379169285297394, "epoch": 0.87, "learning_rate": 4.56508875739645e-05, "loss": 0.3025, "step": 1029, "task_loss": 0.5890194773674011 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4659903645515442, "epoch": 0.87, "learning_rate": 4.564666103127642e-05, "loss": 0.4342, "step": 1030, "task_loss": 0.9342086911201477 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33894455432891846, "epoch": 0.87, "learning_rate": 4.564243448858834e-05, "loss": 0.3395, "step": 1031, "task_loss": 0.31865039467811584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34021997451782227, "epoch": 0.87, "learning_rate": 4.563820794590026e-05, "loss": 0.3123, "step": 1032, "task_loss": 0.14327658712863922 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21941176056861877, "epoch": 0.87, "learning_rate": 4.563398140321217e-05, "loss": 0.4326, "step": 1033, "task_loss": 0.0961829125881195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3016534447669983, "epoch": 0.87, "learning_rate": 4.562975486052409e-05, "loss": 0.3609, "step": 1034, "task_loss": 0.5202248096466064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19246357679367065, "epoch": 0.87, "learning_rate": 4.562552831783602e-05, "loss": 0.2473, "step": 1035, "task_loss": 0.512968122959137 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.31773266196250916, "epoch": 0.88, "learning_rate": 4.562130177514793e-05, "loss": 0.3702, "step": 1036, "task_loss": 0.3712160587310791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3228875994682312, "epoch": 0.88, "learning_rate": 4.561707523245985e-05, "loss": 0.502, "step": 1037, "task_loss": 1.380858063697815 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18843914568424225, "epoch": 0.88, "learning_rate": 4.561284868977177e-05, "loss": 0.2984, "step": 1038, "task_loss": 0.1643063724040985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23467159271240234, "epoch": 0.88, "learning_rate": 4.560862214708368e-05, "loss": 0.3617, "step": 1039, "task_loss": 0.5294719338417053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2105211764574051, "epoch": 0.88, "learning_rate": 4.56043956043956e-05, "loss": 0.4081, "step": 1040, "task_loss": 0.878465473651886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28716641664505005, "epoch": 0.88, "learning_rate": 4.560016906170753e-05, "loss": 0.3827, "step": 1041, "task_loss": 0.3960720896720886 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3224141299724579, "epoch": 0.88, "learning_rate": 4.559594251901945e-05, "loss": 0.2828, "step": 1042, "task_loss": 0.8254625797271729 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2405867874622345, "epoch": 0.88, "learning_rate": 4.559171597633136e-05, "loss": 0.2642, "step": 1043, "task_loss": 0.47411441802978516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23462903499603271, "epoch": 0.88, "learning_rate": 4.558748943364328e-05, "loss": 0.2545, "step": 1044, "task_loss": 0.5556021928787231 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20667308568954468, "epoch": 0.88, "learning_rate": 4.55832628909552e-05, "loss": 0.3163, "step": 1045, "task_loss": 0.7395960092544556 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4931938052177429, "epoch": 0.88, "learning_rate": 4.557903634826712e-05, "loss": 0.3809, "step": 1046, "task_loss": 0.9698067903518677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40067774057388306, "epoch": 0.88, "learning_rate": 4.557480980557904e-05, "loss": 0.4271, "step": 1047, "task_loss": 0.1059439554810524 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15574510395526886, "epoch": 0.89, "learning_rate": 4.557058326289096e-05, "loss": 0.416, "step": 1048, "task_loss": 0.5026818513870239 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26975488662719727, "epoch": 0.89, "learning_rate": 4.556635672020287e-05, "loss": 0.4143, "step": 1049, "task_loss": 0.6446378231048584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3566000759601593, "epoch": 0.89, "learning_rate": 4.556213017751479e-05, "loss": 0.3302, "step": 1050, "task_loss": 0.7665790915489197 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21607786417007446, "epoch": 0.89, "learning_rate": 4.555790363482671e-05, "loss": 0.3492, "step": 1051, "task_loss": 0.5244699120521545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22081968188285828, "epoch": 0.89, "learning_rate": 4.555367709213863e-05, "loss": 0.292, "step": 1052, "task_loss": 0.11215706169605255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.46807587146759033, "epoch": 0.89, "learning_rate": 4.554945054945055e-05, "loss": 0.3871, "step": 1053, "task_loss": 1.020930290222168 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.42110878229141235, "epoch": 0.89, "learning_rate": 4.554522400676247e-05, "loss": 0.3234, "step": 1054, "task_loss": 0.9853991866111755 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15152102708816528, "epoch": 0.89, "learning_rate": 4.554099746407439e-05, "loss": 0.2772, "step": 1055, "task_loss": 0.10596860200166702 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34711506962776184, "epoch": 0.89, "learning_rate": 4.5536770921386304e-05, "loss": 0.3232, "step": 1056, "task_loss": 0.764802873134613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.8313543200492859, "epoch": 0.89, "learning_rate": 4.5532544378698224e-05, "loss": 0.4054, "step": 1057, "task_loss": 0.738728940486908 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2969970703125, "epoch": 0.89, "learning_rate": 4.552831783601015e-05, "loss": 0.3304, "step": 1058, "task_loss": 1.0873650312423706 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1679776906967163, "epoch": 0.89, "learning_rate": 4.5524091293322064e-05, "loss": 0.335, "step": 1059, "task_loss": 0.09169580042362213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25704994797706604, "epoch": 0.9, "learning_rate": 4.5519864750633983e-05, "loss": 0.4206, "step": 1060, "task_loss": 1.3041671514511108 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3977009057998657, "epoch": 0.9, "learning_rate": 4.55156382079459e-05, "loss": 0.3533, "step": 1061, "task_loss": 0.6762240529060364 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2623427212238312, "epoch": 0.9, "learning_rate": 4.5511411665257816e-05, "loss": 0.3511, "step": 1062, "task_loss": 0.6453579068183899 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.40080171823501587, "epoch": 0.9, "learning_rate": 4.550718512256974e-05, "loss": 0.3013, "step": 1063, "task_loss": 0.42494773864746094 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4466301202774048, "epoch": 0.9, "learning_rate": 4.550295857988166e-05, "loss": 0.3897, "step": 1064, "task_loss": 1.061437726020813 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3910818099975586, "epoch": 0.9, "learning_rate": 4.5498732037193575e-05, "loss": 0.3597, "step": 1065, "task_loss": 0.7140308618545532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6939448714256287, "epoch": 0.9, "learning_rate": 4.5494505494505495e-05, "loss": 0.5485, "step": 1066, "task_loss": 0.2731471061706543 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35204967856407166, "epoch": 0.9, "learning_rate": 4.5490278951817415e-05, "loss": 0.3037, "step": 1067, "task_loss": 0.3471311628818512 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41570937633514404, "epoch": 0.9, "learning_rate": 4.5486052409129335e-05, "loss": 0.3338, "step": 1068, "task_loss": 0.9508390426635742 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4666168689727783, "epoch": 0.9, "learning_rate": 4.5481825866441254e-05, "loss": 0.3404, "step": 1069, "task_loss": 0.6101741790771484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21288862824440002, "epoch": 0.9, "learning_rate": 4.5477599323753174e-05, "loss": 0.3286, "step": 1070, "task_loss": 0.5916988849639893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24179798364639282, "epoch": 0.9, "learning_rate": 4.5473372781065094e-05, "loss": 0.2858, "step": 1071, "task_loss": 0.35410770773887634 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.534841775894165, "epoch": 0.91, "learning_rate": 4.546914623837701e-05, "loss": 0.399, "step": 1072, "task_loss": 1.1390701532363892 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2865257263183594, "epoch": 0.91, "learning_rate": 4.5464919695688926e-05, "loss": 0.2767, "step": 1073, "task_loss": 0.3366599977016449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6614929437637329, "epoch": 0.91, "learning_rate": 4.5460693153000846e-05, "loss": 0.3562, "step": 1074, "task_loss": 0.44420671463012695 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27515363693237305, "epoch": 0.91, "learning_rate": 4.5456466610312766e-05, "loss": 0.2986, "step": 1075, "task_loss": 0.4002901315689087 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3757680654525757, "epoch": 0.91, "learning_rate": 4.5452240067624686e-05, "loss": 0.4197, "step": 1076, "task_loss": 0.3707464337348938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19199183583259583, "epoch": 0.91, "learning_rate": 4.5448013524936605e-05, "loss": 0.304, "step": 1077, "task_loss": 0.9447035193443298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15268227458000183, "epoch": 0.91, "learning_rate": 4.544378698224852e-05, "loss": 0.2282, "step": 1078, "task_loss": 0.5656244158744812 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2276354283094406, "epoch": 0.91, "learning_rate": 4.543956043956044e-05, "loss": 0.2688, "step": 1079, "task_loss": 0.3784196376800537 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2888668179512024, "epoch": 0.91, "learning_rate": 4.5435333896872365e-05, "loss": 0.3501, "step": 1080, "task_loss": 0.6107114553451538 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5053978562355042, "epoch": 0.91, "learning_rate": 4.543110735418428e-05, "loss": 0.4985, "step": 1081, "task_loss": 1.2131242752075195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3194124698638916, "epoch": 0.91, "learning_rate": 4.54268808114962e-05, "loss": 0.306, "step": 1082, "task_loss": 0.8277527093887329 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18914735317230225, "epoch": 0.91, "learning_rate": 4.542265426880812e-05, "loss": 0.2814, "step": 1083, "task_loss": 0.40239810943603516 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1989613026380539, "epoch": 0.92, "learning_rate": 4.541842772612004e-05, "loss": 0.3718, "step": 1084, "task_loss": 0.23209945857524872 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2026151567697525, "epoch": 0.92, "learning_rate": 4.5414201183431957e-05, "loss": 0.286, "step": 1085, "task_loss": 0.6754297614097595 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2859596312046051, "epoch": 0.92, "learning_rate": 4.5409974640743876e-05, "loss": 0.3311, "step": 1086, "task_loss": 0.773571252822876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.34478265047073364, "epoch": 0.92, "learning_rate": 4.5405748098055796e-05, "loss": 0.3192, "step": 1087, "task_loss": 0.3566049337387085 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23188981413841248, "epoch": 0.92, "learning_rate": 4.540152155536771e-05, "loss": 0.3517, "step": 1088, "task_loss": 0.2745937705039978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.350466787815094, "epoch": 0.92, "learning_rate": 4.539729501267963e-05, "loss": 0.3144, "step": 1089, "task_loss": 0.44054114818573 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4395449757575989, "epoch": 0.92, "learning_rate": 4.539306846999155e-05, "loss": 0.3265, "step": 1090, "task_loss": 0.77244633436203 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17624793946743011, "epoch": 0.92, "learning_rate": 4.538884192730347e-05, "loss": 0.4105, "step": 1091, "task_loss": 1.2938034534454346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2849665880203247, "epoch": 0.92, "learning_rate": 4.538461538461539e-05, "loss": 0.3222, "step": 1092, "task_loss": 0.6030697822570801 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2724902927875519, "epoch": 0.92, "learning_rate": 4.538038884192731e-05, "loss": 0.3462, "step": 1093, "task_loss": 0.5950093269348145 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6185368895530701, "epoch": 0.92, "learning_rate": 4.537616229923922e-05, "loss": 0.4845, "step": 1094, "task_loss": 0.5496288537979126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3195955455303192, "epoch": 0.93, "learning_rate": 4.537193575655114e-05, "loss": 0.309, "step": 1095, "task_loss": 0.423796147108078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.197946697473526, "epoch": 0.93, "learning_rate": 4.536770921386306e-05, "loss": 0.2794, "step": 1096, "task_loss": 0.1070009246468544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5025796890258789, "epoch": 0.93, "learning_rate": 4.536348267117498e-05, "loss": 0.4463, "step": 1097, "task_loss": 0.750395655632019 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4005841314792633, "epoch": 0.93, "learning_rate": 4.53592561284869e-05, "loss": 0.4582, "step": 1098, "task_loss": 0.8168262243270874 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.41911229491233826, "epoch": 0.93, "learning_rate": 4.535502958579882e-05, "loss": 0.3319, "step": 1099, "task_loss": 0.9958615303039551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4013597071170807, "epoch": 0.93, "learning_rate": 4.535080304311074e-05, "loss": 0.3226, "step": 1100, "task_loss": 0.6000455021858215 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15728117525577545, "epoch": 0.93, "learning_rate": 4.534657650042265e-05, "loss": 0.3644, "step": 1101, "task_loss": 0.3147355318069458 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2503385543823242, "epoch": 0.93, "learning_rate": 4.534234995773458e-05, "loss": 0.2453, "step": 1102, "task_loss": 0.3066632151603699 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.17960643768310547, "epoch": 0.93, "learning_rate": 4.53381234150465e-05, "loss": 0.2847, "step": 1103, "task_loss": 0.02352246083319187 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4108585715293884, "epoch": 0.93, "learning_rate": 4.533389687235841e-05, "loss": 0.2944, "step": 1104, "task_loss": 0.7071190476417542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2555324137210846, "epoch": 0.93, "learning_rate": 4.532967032967033e-05, "loss": 0.3536, "step": 1105, "task_loss": 0.9082435369491577 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.412306547164917, "epoch": 0.93, "learning_rate": 4.532544378698225e-05, "loss": 0.3878, "step": 1106, "task_loss": 0.6728000044822693 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3466845750808716, "epoch": 0.94, "learning_rate": 4.532121724429417e-05, "loss": 0.3573, "step": 1107, "task_loss": 0.42230331897735596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4577171206474304, "epoch": 0.94, "learning_rate": 4.531699070160609e-05, "loss": 0.2847, "step": 1108, "task_loss": 0.49863535165786743 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22043421864509583, "epoch": 0.94, "learning_rate": 4.531276415891801e-05, "loss": 0.3771, "step": 1109, "task_loss": 0.2066926658153534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24291947484016418, "epoch": 0.94, "learning_rate": 4.530853761622992e-05, "loss": 0.3067, "step": 1110, "task_loss": 0.34979933500289917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3833697736263275, "epoch": 0.94, "learning_rate": 4.530431107354184e-05, "loss": 0.3468, "step": 1111, "task_loss": 0.5778905749320984 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.27140769362449646, "epoch": 0.94, "learning_rate": 4.530008453085376e-05, "loss": 0.3597, "step": 1112, "task_loss": 0.7911396622657776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2681792378425598, "epoch": 0.94, "learning_rate": 4.529585798816568e-05, "loss": 0.4337, "step": 1113, "task_loss": 0.4078145921230316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35279330611228943, "epoch": 0.94, "learning_rate": 4.52916314454776e-05, "loss": 0.3911, "step": 1114, "task_loss": 0.5029667615890503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23784451186656952, "epoch": 0.94, "learning_rate": 4.528740490278952e-05, "loss": 0.311, "step": 1115, "task_loss": 0.06948636472225189 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.25149303674697876, "epoch": 0.94, "learning_rate": 4.528317836010144e-05, "loss": 0.2771, "step": 1116, "task_loss": 0.418526291847229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2822594940662384, "epoch": 0.94, "learning_rate": 4.5278951817413354e-05, "loss": 0.3637, "step": 1117, "task_loss": 0.5836882591247559 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.33597350120544434, "epoch": 0.94, "learning_rate": 4.5274725274725274e-05, "loss": 0.2707, "step": 1118, "task_loss": 0.2719374895095825 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1671154946088791, "epoch": 0.95, "learning_rate": 4.52704987320372e-05, "loss": 0.2925, "step": 1119, "task_loss": 0.4273891746997833 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.18667495250701904, "epoch": 0.95, "learning_rate": 4.5266272189349114e-05, "loss": 0.3111, "step": 1120, "task_loss": 0.15779000520706177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3783668279647827, "epoch": 0.95, "learning_rate": 4.526204564666103e-05, "loss": 0.3265, "step": 1121, "task_loss": 1.1066042184829712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22718198597431183, "epoch": 0.95, "learning_rate": 4.525781910397295e-05, "loss": 0.2758, "step": 1122, "task_loss": 0.3243577182292938 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2897654175758362, "epoch": 0.95, "learning_rate": 4.5253592561284866e-05, "loss": 0.3091, "step": 1123, "task_loss": 0.3035680651664734 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.1746860146522522, "epoch": 0.95, "learning_rate": 4.524936601859679e-05, "loss": 0.2814, "step": 1124, "task_loss": 0.8144105076789856 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29736846685409546, "epoch": 0.95, "learning_rate": 4.524513947590871e-05, "loss": 0.4041, "step": 1125, "task_loss": 0.43958353996276855 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.345153272151947, "epoch": 0.95, "learning_rate": 4.5240912933220625e-05, "loss": 0.3786, "step": 1126, "task_loss": 0.6712506413459778 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.28389960527420044, "epoch": 0.95, "learning_rate": 4.5236686390532545e-05, "loss": 0.336, "step": 1127, "task_loss": 1.4295475482940674 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2105841338634491, "epoch": 0.95, "learning_rate": 4.5232459847844465e-05, "loss": 0.3519, "step": 1128, "task_loss": 0.8832420110702515 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3502531945705414, "epoch": 0.95, "learning_rate": 4.5228233305156384e-05, "loss": 0.3231, "step": 1129, "task_loss": 1.0557340383529663 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35527557134628296, "epoch": 0.95, "learning_rate": 4.5224006762468304e-05, "loss": 0.3625, "step": 1130, "task_loss": 0.874673068523407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14178301393985748, "epoch": 0.96, "learning_rate": 4.5219780219780224e-05, "loss": 0.3108, "step": 1131, "task_loss": 0.10780816525220871 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.32874321937561035, "epoch": 0.96, "learning_rate": 4.5215553677092144e-05, "loss": 0.3829, "step": 1132, "task_loss": 0.1389910876750946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20046493411064148, "epoch": 0.96, "learning_rate": 4.5211327134404057e-05, "loss": 0.371, "step": 1133, "task_loss": 0.6660134792327881 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3007514476776123, "epoch": 0.96, "learning_rate": 4.5207100591715976e-05, "loss": 0.3175, "step": 1134, "task_loss": 0.29382359981536865 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4086291790008545, "epoch": 0.96, "learning_rate": 4.5202874049027896e-05, "loss": 0.4597, "step": 1135, "task_loss": 1.5114408731460571 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.26803046464920044, "epoch": 0.96, "learning_rate": 4.5198647506339816e-05, "loss": 0.3112, "step": 1136, "task_loss": 0.731511116027832 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3154323697090149, "epoch": 0.96, "learning_rate": 4.5194420963651736e-05, "loss": 0.3292, "step": 1137, "task_loss": 0.8449059724807739 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.38997378945350647, "epoch": 0.96, "learning_rate": 4.5190194420963655e-05, "loss": 0.3196, "step": 1138, "task_loss": 0.3029818832874298 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23375770449638367, "epoch": 0.96, "learning_rate": 4.518596787827557e-05, "loss": 0.3731, "step": 1139, "task_loss": 0.8964962959289551 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.544218897819519, "epoch": 0.96, "learning_rate": 4.518174133558749e-05, "loss": 0.3918, "step": 1140, "task_loss": 0.3147408664226532 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5715116858482361, "epoch": 0.96, "learning_rate": 4.5177514792899414e-05, "loss": 0.4012, "step": 1141, "task_loss": 0.6181625723838806 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5026450753211975, "epoch": 0.96, "learning_rate": 4.5173288250211334e-05, "loss": 0.4475, "step": 1142, "task_loss": 0.8529657125473022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22553801536560059, "epoch": 0.97, "learning_rate": 4.516906170752325e-05, "loss": 0.3412, "step": 1143, "task_loss": 0.3726401627063751 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.14779837429523468, "epoch": 0.97, "learning_rate": 4.516483516483517e-05, "loss": 0.2942, "step": 1144, "task_loss": 0.22872155904769897 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24052079021930695, "epoch": 0.97, "learning_rate": 4.516060862214709e-05, "loss": 0.2718, "step": 1145, "task_loss": 0.34861892461776733 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23975765705108643, "epoch": 0.97, "learning_rate": 4.5156382079459006e-05, "loss": 0.362, "step": 1146, "task_loss": 1.418960690498352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24894945323467255, "epoch": 0.97, "learning_rate": 4.5152155536770926e-05, "loss": 0.273, "step": 1147, "task_loss": 0.3433225452899933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2631831765174866, "epoch": 0.97, "learning_rate": 4.5147928994082846e-05, "loss": 0.3382, "step": 1148, "task_loss": 0.5902136564254761 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.39580830931663513, "epoch": 0.97, "learning_rate": 4.514370245139476e-05, "loss": 0.3485, "step": 1149, "task_loss": 1.0852302312850952 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35671716928482056, "epoch": 0.97, "learning_rate": 4.513947590870668e-05, "loss": 0.3003, "step": 1150, "task_loss": 0.9312202334403992 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.37598925828933716, "epoch": 0.97, "learning_rate": 4.51352493660186e-05, "loss": 0.2755, "step": 1151, "task_loss": 0.870559573173523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.35334280133247375, "epoch": 0.97, "learning_rate": 4.513102282333052e-05, "loss": 0.3073, "step": 1152, "task_loss": 0.2768174111843109 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2127104252576828, "epoch": 0.97, "learning_rate": 4.512679628064244e-05, "loss": 0.3253, "step": 1153, "task_loss": 0.20750877261161804 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.12464627623558044, "epoch": 0.97, "learning_rate": 4.512256973795436e-05, "loss": 0.3133, "step": 1154, "task_loss": 0.5762190222740173 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3354787230491638, "epoch": 0.98, "learning_rate": 4.511834319526627e-05, "loss": 0.4139, "step": 1155, "task_loss": 0.9044637680053711 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22063955664634705, "epoch": 0.98, "learning_rate": 4.511411665257819e-05, "loss": 0.3418, "step": 1156, "task_loss": 0.23841185867786407 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15205956995487213, "epoch": 0.98, "learning_rate": 4.510989010989011e-05, "loss": 0.2575, "step": 1157, "task_loss": 0.39559870958328247 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.4291304349899292, "epoch": 0.98, "learning_rate": 4.5105663567202036e-05, "loss": 0.2708, "step": 1158, "task_loss": 0.37119153141975403 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19470790028572083, "epoch": 0.98, "learning_rate": 4.510143702451395e-05, "loss": 0.3946, "step": 1159, "task_loss": 0.43265652656555176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3006949722766876, "epoch": 0.98, "learning_rate": 4.509721048182587e-05, "loss": 0.3647, "step": 1160, "task_loss": 0.9573840498924255 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23114867508411407, "epoch": 0.98, "learning_rate": 4.509298393913779e-05, "loss": 0.2867, "step": 1161, "task_loss": 0.268611341714859 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.24773582816123962, "epoch": 0.98, "learning_rate": 4.50887573964497e-05, "loss": 0.3458, "step": 1162, "task_loss": 0.718194842338562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.5117859244346619, "epoch": 0.98, "learning_rate": 4.508453085376163e-05, "loss": 0.4148, "step": 1163, "task_loss": 1.3371446132659912 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3454076051712036, "epoch": 0.98, "learning_rate": 4.508030431107355e-05, "loss": 0.2876, "step": 1164, "task_loss": 0.7379355430603027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.362413227558136, "epoch": 0.98, "learning_rate": 4.507607776838546e-05, "loss": 0.2879, "step": 1165, "task_loss": 0.3431224822998047 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.29363489151000977, "epoch": 0.99, "learning_rate": 4.507185122569738e-05, "loss": 0.2822, "step": 1166, "task_loss": 0.4833914041519165 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2251429408788681, "epoch": 0.99, "learning_rate": 4.50676246830093e-05, "loss": 0.2479, "step": 1167, "task_loss": 0.2708614766597748 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2700488567352295, "epoch": 0.99, "learning_rate": 4.5063398140321214e-05, "loss": 0.3066, "step": 1168, "task_loss": 0.9844657182693481 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6104203462600708, "epoch": 0.99, "learning_rate": 4.505917159763314e-05, "loss": 0.376, "step": 1169, "task_loss": 0.8192672729492188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21359509229660034, "epoch": 0.99, "learning_rate": 4.505494505494506e-05, "loss": 0.2018, "step": 1170, "task_loss": 0.15554118156433105 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.16944999992847443, "epoch": 0.99, "learning_rate": 4.505071851225698e-05, "loss": 0.2056, "step": 1171, "task_loss": 0.14514167606830597 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3679329752922058, "epoch": 0.99, "learning_rate": 4.504649196956889e-05, "loss": 0.2941, "step": 1172, "task_loss": 0.944209098815918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.3078688979148865, "epoch": 0.99, "learning_rate": 4.504226542688081e-05, "loss": 0.311, "step": 1173, "task_loss": 0.24003413319587708 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.21030209958553314, "epoch": 0.99, "learning_rate": 4.503803888419273e-05, "loss": 0.3264, "step": 1174, "task_loss": 0.37533411383628845 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.2800752520561218, "epoch": 0.99, "learning_rate": 4.503381234150465e-05, "loss": 0.3553, "step": 1175, "task_loss": 1.0056970119476318 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.6683984398841858, "epoch": 0.99, "learning_rate": 4.502958579881657e-05, "loss": 0.4299, "step": 1176, "task_loss": 0.6065475940704346 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.15709534287452698, "epoch": 0.99, "learning_rate": 4.502535925612849e-05, "loss": 0.2601, "step": 1177, "task_loss": 0.42998674511909485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19763796031475067, "epoch": 1.0, "learning_rate": 4.5021132713440404e-05, "loss": 0.3497, "step": 1178, "task_loss": 0.46407046914100647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.36419183015823364, "epoch": 1.0, "learning_rate": 4.5016906170752324e-05, "loss": 0.3657, "step": 1179, "task_loss": 1.5342886447906494 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.22747506201267242, "epoch": 1.0, "learning_rate": 4.501267962806425e-05, "loss": 0.2932, "step": 1180, "task_loss": 0.34386053681373596 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.23093140125274658, "epoch": 1.0, "learning_rate": 4.500845308537616e-05, "loss": 0.2518, "step": 1181, "task_loss": 1.2779991626739502 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.19193525612354279, "epoch": 1.0, "learning_rate": 4.500422654268808e-05, "loss": 0.3346, "step": 1182, "task_loss": 0.13631373643875122 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -Infinity, "compression/movement_sparsity/linear_layer_sparsity": 0.0, "compression/movement_sparsity/model_sparsity": 0.0, "compression_loss": 0.0, "distillation_loss": 0.20304660499095917, "epoch": 1.0, "learning_rate": 4.5e-05, "loss": 0.3005, "step": 1183, "task_loss": 0.3247835338115692 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0, "compression/movement_sparsity/importance_threshold": -0.0047100442461669445, "compression/movement_sparsity/linear_layer_sparsity": 0.0010019401097710254, "compression/movement_sparsity/model_sparsity": 0.0009675203846720848, "compression_loss": 0.0, "distillation_loss": 0.3937605023384094, "epoch": 1.0, "learning_rate": 4.4995773457311916e-05, "loss": 0.5788, "step": 1184, "task_loss": 0.9173633456230164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.0037974665492690463, "compression/movement_sparsity/importance_threshold": -0.004698120089186681, "compression/movement_sparsity/linear_layer_sparsity": 0.0010110978705153682, "compression/movement_sparsity/model_sparsity": 0.0009763635481622922, "compression_loss": 0.4102906286716461, "distillation_loss": 0.3808823823928833, "epoch": 1.0, "learning_rate": 4.4991546914623836e-05, "loss": 0.7763, "step": 1185, "task_loss": 0.8962676525115967 }, { "compression/movement_sparsity/importance_regularization_factor": 0.007588518460622751, "compression/movement_sparsity/importance_threshold": -0.004686216074358686, "compression/movement_sparsity/linear_layer_sparsity": 0.0010141504574301492, "compression/movement_sparsity/model_sparsity": 0.0009793112693256946, "compression_loss": 0.8198877573013306, "distillation_loss": 0.3202807903289795, "epoch": 1.0, "learning_rate": 4.498732037193576e-05, "loss": 1.1626, "step": 1186, "task_loss": 0.41993218660354614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.011373161156410516, "compression/movement_sparsity/importance_threshold": -0.004674332184656624, "compression/movement_sparsity/linear_layer_sparsity": 0.001023308218174492, "compression/movement_sparsity/model_sparsity": 0.000988154432815902, "compression_loss": 1.2287936210632324, "distillation_loss": 0.44650501012802124, "epoch": 1.0, "learning_rate": 4.498309382924768e-05, "loss": 1.5534, "step": 1187, "task_loss": 0.715635359287262 }, { "compression/movement_sparsity/importance_regularization_factor": 0.015151400058979747, "compression/movement_sparsity/importance_threshold": -0.004662468403054164, "compression/movement_sparsity/linear_layer_sparsity": 0.001037056783458642, "compression/movement_sparsity/model_sparsity": 0.0010014306925870075, "compression_loss": 1.63700532913208, "distillation_loss": 0.6858876943588257, "epoch": 1.0, "learning_rate": 4.4978867286559595e-05, "loss": 2.1211, "step": 1188, "task_loss": 0.6914085149765015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.018923240590679014, "compression/movement_sparsity/importance_threshold": -0.004650624712524971, "compression/movement_sparsity/linear_layer_sparsity": 0.0010706352395212322, "compression/movement_sparsity/model_sparsity": 0.0010338556253844343, "compression_loss": 2.044524908065796, "distillation_loss": 0.3964264690876007, "epoch": 1.01, "learning_rate": 4.4974640743871514e-05, "loss": 2.3648, "step": 1189, "task_loss": 0.3107689917087555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.022688688173855553, "compression/movement_sparsity/importance_threshold": -0.004638801096042715, "compression/movement_sparsity/linear_layer_sparsity": 0.0011042136955838225, "compression/movement_sparsity/model_sparsity": 0.0010662805581818612, "compression_loss": 2.4513514041900635, "distillation_loss": 0.481489896774292, "epoch": 1.01, "learning_rate": 4.4970414201183434e-05, "loss": 2.8931, "step": 1190, "task_loss": 0.24588356912136078 }, { "compression/movement_sparsity/importance_regularization_factor": 0.026447748230858936, "compression/movement_sparsity/importance_threshold": -0.004626997536581058, "compression/movement_sparsity/linear_layer_sparsity": 0.0011133714563281653, "compression/movement_sparsity/model_sparsity": 0.0010751237216720686, "compression_loss": 2.857487678527832, "distillation_loss": 0.2723100781440735, "epoch": 1.01, "learning_rate": 4.4966187658495354e-05, "loss": 3.1045, "step": 1191, "task_loss": 0.393263578414917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.030200426184036733, "compression/movement_sparsity/importance_threshold": -0.00461521401711367, "compression/movement_sparsity/linear_layer_sparsity": 0.0011486908408655915, "compression/movement_sparsity/model_sparsity": 0.0011092297766954985, "compression_loss": 3.262927770614624, "distillation_loss": 0.26160818338394165, "epoch": 1.01, "learning_rate": 4.4961961115807274e-05, "loss": 3.6934, "step": 1192, "task_loss": 0.5637128949165344 }, { "compression/movement_sparsity/importance_regularization_factor": 0.033946727455736514, "compression/movement_sparsity/importance_threshold": -0.004603450520614219, "compression/movement_sparsity/linear_layer_sparsity": 0.0011914389818401603, "compression/movement_sparsity/model_sparsity": 0.0011505093875189272, "compression_loss": 3.6676766872406006, "distillation_loss": 0.34008169174194336, "epoch": 1.01, "learning_rate": 4.4957734573119193e-05, "loss": 4.0452, "step": 1193, "task_loss": 0.8559937477111816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.03768665746830768, "compression/movement_sparsity/importance_threshold": -0.004591707030056367, "compression/movement_sparsity/linear_layer_sparsity": 0.001225005513735115, "compression/movement_sparsity/model_sparsity": 0.0011829228057805596, "compression_loss": 4.071734428405762, "distillation_loss": 0.30722981691360474, "epoch": 1.01, "learning_rate": 4.4953508030431106e-05, "loss": 4.4792, "step": 1194, "task_loss": 1.1641637086868286 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04142022164409781, "compression/movement_sparsity/importance_threshold": -0.0045799835284137825, "compression/movement_sparsity/linear_layer_sparsity": 0.0012463855463062173, "compression/movement_sparsity/model_sparsity": 0.0012035683684601711, "compression_loss": 4.475098133087158, "distillation_loss": 0.17917898297309875, "epoch": 1.01, "learning_rate": 4.4949281487743026e-05, "loss": 4.8255, "step": 1195, "task_loss": 0.061687059700489044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04514742540545447, "compression/movement_sparsity/importance_threshold": -0.004568279998660136, "compression/movement_sparsity/linear_layer_sparsity": 0.0012891217631131502, "compression/movement_sparsity/model_sparsity": 0.0012448364647478054, "compression_loss": 4.877766132354736, "distillation_loss": 0.24638590216636658, "epoch": 1.01, "learning_rate": 4.4945054945054946e-05, "loss": 5.2702, "step": 1196, "task_loss": 0.36251261830329895 }, { "compression/movement_sparsity/importance_regularization_factor": 0.04886827417472722, "compression/movement_sparsity/importance_threshold": -0.00455659642376909, "compression/movement_sparsity/linear_layer_sparsity": 0.0012830285134512243, "compression/movement_sparsity/model_sparsity": 0.001238952536956795, "compression_loss": 5.279744625091553, "distillation_loss": 0.44648393988609314, "epoch": 1.01, "learning_rate": 4.4940828402366866e-05, "loss": 5.6236, "step": 1197, "task_loss": 0.6393554210662842 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05258277337426348, "compression/movement_sparsity/importance_threshold": -0.004544932786714311, "compression/movement_sparsity/linear_layer_sparsity": 0.0013074492087694718, "compression/movement_sparsity/model_sparsity": 0.0012625343062640146, "compression_loss": 5.681037425994873, "distillation_loss": 0.2602553963661194, "epoch": 1.01, "learning_rate": 4.4936601859678785e-05, "loss": 5.9757, "step": 1198, "task_loss": 0.5741052031517029 }, { "compression/movement_sparsity/importance_regularization_factor": 0.05629092842641148, "compression/movement_sparsity/importance_threshold": -0.004533289070469468, "compression/movement_sparsity/linear_layer_sparsity": 0.001329008103855112, "compression/movement_sparsity/model_sparsity": 0.0012833525869805442, "compression_loss": 6.081630706787109, "distillation_loss": 0.1991117298603058, "epoch": 1.01, "learning_rate": 4.4932375316990705e-05, "loss": 6.3822, "step": 1199, "task_loss": 1.0478651523590088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.059992744753518956, "compression/movement_sparsity/importance_threshold": -0.004521665258008229, "compression/movement_sparsity/linear_layer_sparsity": 0.0013919915573077405, "compression/movement_sparsity/model_sparsity": 0.0013441723650473088, "compression_loss": 6.481531620025635, "distillation_loss": 0.3642633855342865, "epoch": 1.01, "learning_rate": 4.4928148774302625e-05, "loss": 6.8904, "step": 1200, "task_loss": 0.6244286298751831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06368822777793498, "compression/movement_sparsity/importance_threshold": -0.004510061332304256, "compression/movement_sparsity/linear_layer_sparsity": 0.001468902438559057, "compression/movement_sparsity/model_sparsity": 0.0014184411209220969, "compression_loss": 6.880734443664551, "distillation_loss": 0.23397861421108246, "epoch": 1.02, "learning_rate": 4.492392223161454e-05, "loss": 7.206, "step": 1201, "task_loss": 0.36994272470474243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.06737738292200746, "compression/movement_sparsity/importance_threshold": -0.004498477276331219, "compression/movement_sparsity/linear_layer_sparsity": 0.0015038402497321356, "compression/movement_sparsity/model_sparsity": 0.0014521787108001014, "compression_loss": 7.2792439460754395, "distillation_loss": 0.30487754940986633, "epoch": 1.02, "learning_rate": 4.491969568892646e-05, "loss": 7.6558, "step": 1202, "task_loss": 0.596557080745697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07106021560808395, "compression/movement_sparsity/importance_threshold": -0.004486913073062785, "compression/movement_sparsity/linear_layer_sparsity": 0.0015490686175749638, "compression/movement_sparsity/model_sparsity": 0.0014958533450687946, "compression_loss": 7.67705774307251, "distillation_loss": 0.20317770540714264, "epoch": 1.02, "learning_rate": 4.4915469146238384e-05, "loss": 8.0139, "step": 1203, "task_loss": 0.9098516702651978 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07473673125851371, "compression/movement_sparsity/importance_threshold": -0.004475368705472619, "compression/movement_sparsity/linear_layer_sparsity": 0.0015843760779447544, "compression/movement_sparsity/model_sparsity": 0.00152994788555643, "compression_loss": 8.07417106628418, "distillation_loss": 0.2673817574977875, "epoch": 1.02, "learning_rate": 4.49112426035503e-05, "loss": 8.4658, "step": 1204, "task_loss": 0.8123477101325989 }, { "compression/movement_sparsity/importance_regularization_factor": 0.07840693529564446, "compression/movement_sparsity/importance_threshold": -0.0044638441565343885, "compression/movement_sparsity/linear_layer_sparsity": 0.0016935179843158084, "compression/movement_sparsity/model_sparsity": 0.001635340431683862, "compression_loss": 8.470596313476562, "distillation_loss": 0.3441730737686157, "epoch": 1.02, "learning_rate": 4.490701606086222e-05, "loss": 8.7867, "step": 1205, "task_loss": 0.21049267053604126 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08207083314182378, "compression/movement_sparsity/importance_threshold": -0.004452339409221761, "compression/movement_sparsity/linear_layer_sparsity": 0.0017871942452631483, "compression/movement_sparsity/model_sparsity": 0.0017257986248857744, "compression_loss": 8.866316795349121, "distillation_loss": 0.3460751175880432, "epoch": 1.02, "learning_rate": 4.4902789518174136e-05, "loss": 9.2504, "step": 1206, "task_loss": 0.3923206031322479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08572843021940091, "compression/movement_sparsity/importance_threshold": -0.0044408544465084025, "compression/movement_sparsity/linear_layer_sparsity": 0.001922652789606552, "compression/movement_sparsity/model_sparsity": 0.0018566037515117578, "compression_loss": 9.26134967803955, "distillation_loss": 0.2772218585014343, "epoch": 1.02, "learning_rate": 4.489856297548605e-05, "loss": 9.5127, "step": 1207, "task_loss": 0.49269193410873413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.08937973195072357, "compression/movement_sparsity/importance_threshold": -0.004429389251367978, "compression/movement_sparsity/linear_layer_sparsity": 0.00202417515285829, "compression/movement_sparsity/model_sparsity": 0.0019546385092664782, "compression_loss": 9.655686378479004, "distillation_loss": 0.305117666721344, "epoch": 1.02, "learning_rate": 4.4894336432797976e-05, "loss": 9.986, "step": 1208, "task_loss": 1.6477200984954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09302474375814002, "compression/movement_sparsity/importance_threshold": -0.004417943806774157, "compression/movement_sparsity/linear_layer_sparsity": 0.002192055509003605, "compression/movement_sparsity/model_sparsity": 0.002116751658717818, "compression_loss": 10.049321174621582, "distillation_loss": 0.2753101587295532, "epoch": 1.02, "learning_rate": 4.4890109890109896e-05, "loss": 10.3471, "step": 1209, "task_loss": 0.9186722636222839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.09666347106399781, "compression/movement_sparsity/importance_threshold": -0.004406518095700606, "compression/movement_sparsity/linear_layer_sparsity": 0.0023940866812580323, "compression/movement_sparsity/model_sparsity": 0.002311842438684723, "compression_loss": 10.442254066467285, "distillation_loss": 0.4144275188446045, "epoch": 1.02, "learning_rate": 4.488588334742181e-05, "loss": 10.8568, "step": 1210, "task_loss": 0.8719772696495056 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10029591929064618, "compression/movement_sparsity/importance_threshold": -0.00439511210112099, "compression/movement_sparsity/linear_layer_sparsity": 0.0025583659387773167, "compression/movement_sparsity/model_sparsity": 0.002470478198326111, "compression_loss": 10.834500312805176, "distillation_loss": 0.4396495819091797, "epoch": 1.02, "learning_rate": 4.488165680473373e-05, "loss": 11.262, "step": 1211, "task_loss": 0.9327820539474487 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10392209386043288, "compression/movement_sparsity/importance_threshold": -0.0043837258060089755, "compression/movement_sparsity/linear_layer_sparsity": 0.002723241404678395, "compression/movement_sparsity/model_sparsity": 0.0026296896847572268, "compression_loss": 11.226054191589355, "distillation_loss": 0.4338788688182831, "epoch": 1.02, "learning_rate": 4.487743026204565e-05, "loss": 11.5975, "step": 1212, "task_loss": 0.75318443775177 }, { "compression/movement_sparsity/importance_regularization_factor": 0.10754200019570564, "compression/movement_sparsity/importance_threshold": -0.004372359193338233, "compression/movement_sparsity/linear_layer_sparsity": 0.002939414639748955, "compression/movement_sparsity/model_sparsity": 0.002838436704176457, "compression_loss": 11.616912841796875, "distillation_loss": 0.20085662603378296, "epoch": 1.03, "learning_rate": 4.487320371935757e-05, "loss": 11.8302, "step": 1213, "task_loss": 0.13112294673919678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11115564371881353, "compression/movement_sparsity/importance_threshold": -0.0043610122460824245, "compression/movement_sparsity/linear_layer_sparsity": 0.0032813043742044197, "compression/movement_sparsity/model_sparsity": 0.0031685814744775304, "compression_loss": 12.007067680358887, "distillation_loss": 0.2116384506225586, "epoch": 1.03, "learning_rate": 4.486897717666949e-05, "loss": 12.3508, "step": 1214, "task_loss": 0.383533775806427 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11476302985210429, "compression/movement_sparsity/importance_threshold": -0.004349684947215218, "compression/movement_sparsity/linear_layer_sparsity": 0.003593240599558596, "compression/movement_sparsity/model_sparsity": 0.003469801730862718, "compression_loss": 12.396533012390137, "distillation_loss": 0.2002447545528412, "epoch": 1.03, "learning_rate": 4.486475063398141e-05, "loss": 12.7334, "step": 1215, "task_loss": 0.9297578930854797 }, { "compression/movement_sparsity/importance_regularization_factor": 0.11836416401792565, "compression/movement_sparsity/importance_threshold": -0.004338377279710284, "compression/movement_sparsity/linear_layer_sparsity": 0.0038534736340436705, "compression/movement_sparsity/model_sparsity": 0.003721094960042776, "compression_loss": 12.785306930541992, "distillation_loss": 0.30430400371551514, "epoch": 1.03, "learning_rate": 4.486052409129333e-05, "loss": 13.2453, "step": 1216, "task_loss": 0.3592604398727417 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12195905163862686, "compression/movement_sparsity/importance_threshold": -0.004327089226541283, "compression/movement_sparsity/linear_layer_sparsity": 0.0041488114180487255, "compression/movement_sparsity/model_sparsity": 0.004006286982601963, "compression_loss": 13.173396110534668, "distillation_loss": 0.24041709303855896, "epoch": 1.03, "learning_rate": 4.485629754860524e-05, "loss": 13.4677, "step": 1217, "task_loss": 0.45772600173950195 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1255476981365553, "compression/movement_sparsity/importance_threshold": -0.004315820770681887, "compression/movement_sparsity/linear_layer_sparsity": 0.004499882761583804, "compression/movement_sparsity/model_sparsity": 0.004345297945464833, "compression_loss": 13.560797691345215, "distillation_loss": 0.21942958235740662, "epoch": 1.03, "learning_rate": 4.485207100591716e-05, "loss": 13.8709, "step": 1218, "task_loss": 0.7078566551208496 }, { "compression/movement_sparsity/importance_regularization_factor": 0.12913010893405957, "compression/movement_sparsity/importance_threshold": -0.004304571895105759, "compression/movement_sparsity/linear_layer_sparsity": 0.004916179302087054, "compression/movement_sparsity/model_sparsity": 0.004747293419123841, "compression_loss": 13.94749927520752, "distillation_loss": 0.3022770881652832, "epoch": 1.03, "learning_rate": 4.484784446322908e-05, "loss": 14.2599, "step": 1219, "task_loss": 0.40298476815223694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1327062894534874, "compression/movement_sparsity/importance_threshold": -0.004293342582786569, "compression/movement_sparsity/linear_layer_sparsity": 0.005411473453177895, "compression/movement_sparsity/model_sparsity": 0.005225572692421682, "compression_loss": 14.333516120910645, "distillation_loss": 0.1827034056186676, "epoch": 1.03, "learning_rate": 4.4843617920541e-05, "loss": 14.6496, "step": 1220, "task_loss": 0.6569167971611023 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1362762451171875, "compression/movement_sparsity/importance_threshold": -0.004282132816697981, "compression/movement_sparsity/linear_layer_sparsity": 0.005854289342503305, "compression/movement_sparsity/model_sparsity": 0.005653176493687749, "compression_loss": 14.718843460083008, "distillation_loss": 0.31411004066467285, "epoch": 1.03, "learning_rate": 4.483939137785292e-05, "loss": 15.0359, "step": 1221, "task_loss": 1.0844365358352661 }, { "compression/movement_sparsity/importance_regularization_factor": 0.13983998134750797, "compression/movement_sparsity/importance_threshold": -0.004270942579813663, "compression/movement_sparsity/linear_layer_sparsity": 0.006360076761113706, "compression/movement_sparsity/model_sparsity": 0.006141588558484785, "compression_loss": 15.103476524353027, "distillation_loss": 0.2729410231113434, "epoch": 1.03, "learning_rate": 4.483516483516484e-05, "loss": 15.4742, "step": 1222, "task_loss": 0.5998313426971436 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14339750356679637, "compression/movement_sparsity/importance_threshold": -0.004259771855107283, "compression/movement_sparsity/linear_layer_sparsity": 0.006962223378389516, "compression/movement_sparsity/model_sparsity": 0.006723049587037506, "compression_loss": 15.48742961883545, "distillation_loss": 0.374523401260376, "epoch": 1.03, "learning_rate": 4.483093829247675e-05, "loss": 15.8628, "step": 1223, "task_loss": 0.2658517062664032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.14694881719740194, "compression/movement_sparsity/importance_threshold": -0.004248620625552504, "compression/movement_sparsity/linear_layer_sparsity": 0.007452366289061665, "compression/movement_sparsity/model_sparsity": 0.007196354580872106, "compression_loss": 15.870694160461426, "distillation_loss": 0.45831477642059326, "epoch": 1.03, "learning_rate": 4.482671174978867e-05, "loss": 16.2698, "step": 1224, "task_loss": 1.0425890684127808 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15049392766167224, "compression/movement_sparsity/importance_threshold": -0.0042374888741229955, "compression/movement_sparsity/linear_layer_sparsity": 0.008002809715468373, "compression/movement_sparsity/model_sparsity": 0.007727888582219698, "compression_loss": 16.253276824951172, "distillation_loss": 0.2757789194583893, "epoch": 1.04, "learning_rate": 4.48224852071006e-05, "loss": 16.5561, "step": 1225, "task_loss": 0.605686366558075 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1540328403819557, "compression/movement_sparsity/importance_threshold": -0.004226376583792424, "compression/movement_sparsity/linear_layer_sparsity": 0.008642732095814598, "compression/movement_sparsity/model_sparsity": 0.008345827660169525, "compression_loss": 16.635168075561523, "distillation_loss": 0.3095744848251343, "epoch": 1.04, "learning_rate": 4.481825866441251e-05, "loss": 16.9931, "step": 1226, "task_loss": 0.15681417286396027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.15756556078059986, "compression/movement_sparsity/importance_threshold": -0.0042152837375344565, "compression/movement_sparsity/linear_layer_sparsity": 0.009174275716518463, "compression/movement_sparsity/model_sparsity": 0.00885911112228277, "compression_loss": 17.016376495361328, "distillation_loss": 0.23931270837783813, "epoch": 1.04, "learning_rate": 4.481403212172443e-05, "loss": 17.3445, "step": 1227, "task_loss": 0.47263094782829285 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1610920942799538, "compression/movement_sparsity/importance_threshold": -0.004204210318322759, "compression/movement_sparsity/linear_layer_sparsity": 0.009869681248874359, "compression/movement_sparsity/model_sparsity": 0.009530627335284595, "compression_loss": 17.39688491821289, "distillation_loss": 0.48166126012802124, "epoch": 1.04, "learning_rate": 4.480980557903635e-05, "loss": 17.8271, "step": 1228, "task_loss": 1.272774338722229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.16461244630236543, "compression/movement_sparsity/importance_threshold": -0.004193156309130997, "compression/movement_sparsity/linear_layer_sparsity": 0.010668123513771745, "compression/movement_sparsity/model_sparsity": 0.010301640652087047, "compression_loss": 17.776710510253906, "distillation_loss": 0.2593509554862976, "epoch": 1.04, "learning_rate": 4.480557903634827e-05, "loss": 18.0733, "step": 1229, "task_loss": 0.23353984951972961 }, { "compression/movement_sparsity/importance_regularization_factor": 0.1681266222701825, "compression/movement_sparsity/importance_threshold": -0.00418212169293284, "compression/movement_sparsity/linear_layer_sparsity": 0.011291614391115751, "compression/movement_sparsity/model_sparsity": 0.010903712699711994, "compression_loss": 18.1558837890625, "distillation_loss": 0.46332481503486633, "epoch": 1.04, "learning_rate": 4.480135249366019e-05, "loss": 18.6271, "step": 1230, "task_loss": 0.2720649242401123 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17163462760575404, "compression/movement_sparsity/importance_threshold": -0.004171106452701952, "compression/movement_sparsity/linear_layer_sparsity": 0.011990084434554063, "compression/movement_sparsity/model_sparsity": 0.011578188148413016, "compression_loss": 18.534366607666016, "distillation_loss": 0.5338602066040039, "epoch": 1.04, "learning_rate": 4.479712595097211e-05, "loss": 19.0769, "step": 1231, "task_loss": 0.370920330286026 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17513646773142766, "compression/movement_sparsity/importance_threshold": -0.004160110571412002, "compression/movement_sparsity/linear_layer_sparsity": 0.012780895232164498, "compression/movement_sparsity/model_sparsity": 0.012341832162306962, "compression_loss": 18.912193298339844, "distillation_loss": 0.8660389184951782, "epoch": 1.04, "learning_rate": 4.479289940828403e-05, "loss": 19.4349, "step": 1232, "task_loss": 1.3601912260055542 }, { "compression/movement_sparsity/importance_regularization_factor": 0.17863214806955124, "compression/movement_sparsity/importance_threshold": -0.004149134032036657, "compression/movement_sparsity/linear_layer_sparsity": 0.013379965414190256, "compression/movement_sparsity/model_sparsity": 0.012920322440624691, "compression_loss": 19.289379119873047, "distillation_loss": 0.4159047603607178, "epoch": 1.04, "learning_rate": 4.478867286559594e-05, "loss": 19.9182, "step": 1233, "task_loss": 1.1996432542800903 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18212167404247404, "compression/movement_sparsity/importance_threshold": -0.0041381768175495805, "compression/movement_sparsity/linear_layer_sparsity": 0.013985701205924461, "compression/movement_sparsity/model_sparsity": 0.01350524934445157, "compression_loss": 19.665891647338867, "distillation_loss": 0.910930871963501, "epoch": 1.04, "learning_rate": 4.478444632290786e-05, "loss": 20.2966, "step": 1234, "task_loss": 0.3422808051109314 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18560505107254344, "compression/movement_sparsity/importance_threshold": -0.004127238910924441, "compression/movement_sparsity/linear_layer_sparsity": 0.01467253326175017, "compression/movement_sparsity/model_sparsity": 0.01416848660621712, "compression_loss": 20.04172706604004, "distillation_loss": 0.5755290985107422, "epoch": 1.04, "learning_rate": 4.478021978021978e-05, "loss": 20.5837, "step": 1235, "task_loss": 0.8419200778007507 }, { "compression/movement_sparsity/importance_regularization_factor": 0.18908228458210785, "compression/movement_sparsity/importance_threshold": -0.004116320295134906, "compression/movement_sparsity/linear_layer_sparsity": 0.015490256905714747, "compression/movement_sparsity/model_sparsity": 0.014958118927399344, "compression_loss": 20.416900634765625, "distillation_loss": 0.6663327813148499, "epoch": 1.04, "learning_rate": 4.47759932375317e-05, "loss": 20.9608, "step": 1236, "task_loss": 0.8463159203529358 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19255337999351502, "compression/movement_sparsity/importance_threshold": -0.004105420953154642, "compression/movement_sparsity/linear_layer_sparsity": 0.016359695664716064, "compression/movement_sparsity/model_sparsity": 0.01579768979032249, "compression_loss": 20.791406631469727, "distillation_loss": 0.5115723609924316, "epoch": 1.05, "learning_rate": 4.477176669484362e-05, "loss": 21.2891, "step": 1237, "task_loss": 0.6861749887466431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.196018342729114, "compression/movement_sparsity/importance_threshold": -0.004094540867957315, "compression/movement_sparsity/linear_layer_sparsity": 0.017066751108852196, "compression/movement_sparsity/model_sparsity": 0.01648045570479558, "compression_loss": 21.165239334106445, "distillation_loss": 0.4414406418800354, "epoch": 1.05, "learning_rate": 4.476754015215554e-05, "loss": 21.701, "step": 1238, "task_loss": 0.6631407141685486 }, { "compression/movement_sparsity/importance_regularization_factor": 0.19947717821125255, "compression/movement_sparsity/importance_threshold": -0.004083680022516592, "compression/movement_sparsity/linear_layer_sparsity": 0.017810628382647874, "compression/movement_sparsity/model_sparsity": 0.017198778505802214, "compression_loss": 21.53841209411621, "distillation_loss": 0.17983829975128174, "epoch": 1.05, "learning_rate": 4.4763313609467454e-05, "loss": 22.1237, "step": 1239, "task_loss": 0.0907113328576088 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20292989186227856, "compression/movement_sparsity/importance_threshold": -0.004072838399806141, "compression/movement_sparsity/linear_layer_sparsity": 0.018584137213018673, "compression/movement_sparsity/model_sparsity": 0.01794571492825828, "compression_loss": 21.910913467407227, "distillation_loss": 0.4408838152885437, "epoch": 1.05, "learning_rate": 4.4759087066779374e-05, "loss": 22.364, "step": 1240, "task_loss": 0.6799322366714478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20637648910454076, "compression/movement_sparsity/importance_threshold": -0.004062015982799626, "compression/movement_sparsity/linear_layer_sparsity": 0.01916205392165841, "compression/movement_sparsity/model_sparsity": 0.018503778420076496, "compression_loss": 22.282758712768555, "distillation_loss": 0.3418844938278198, "epoch": 1.05, "learning_rate": 4.4754860524091293e-05, "loss": 22.7301, "step": 1241, "task_loss": 0.20166003704071045 }, { "compression/movement_sparsity/importance_regularization_factor": 0.20981697536038724, "compression/movement_sparsity/importance_threshold": -0.004051212754470715, "compression/movement_sparsity/linear_layer_sparsity": 0.019898681301531482, "compression/movement_sparsity/model_sparsity": 0.019215100383320048, "compression_loss": 22.653905868530273, "distillation_loss": 0.47465944290161133, "epoch": 1.05, "learning_rate": 4.475063398140321e-05, "loss": 23.1505, "step": 1242, "task_loss": 0.9753168225288391 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21325135605216555, "compression/movement_sparsity/importance_threshold": -0.004040428697793077, "compression/movement_sparsity/linear_layer_sparsity": 0.020610351398542694, "compression/movement_sparsity/model_sparsity": 0.019902322423145626, "compression_loss": 23.02440643310547, "distillation_loss": 0.8669644594192505, "epoch": 1.05, "learning_rate": 4.474640743871513e-05, "loss": 23.6245, "step": 1243, "task_loss": 1.547431468963623 }, { "compression/movement_sparsity/importance_regularization_factor": 0.21667963660222495, "compression/movement_sparsity/importance_threshold": -0.004029663795740375, "compression/movement_sparsity/linear_layer_sparsity": 0.0214038451338712, "compression/movement_sparsity/model_sparsity": 0.020668557207593342, "compression_loss": 23.394229888916016, "distillation_loss": 0.4561890661716461, "epoch": 1.05, "learning_rate": 4.474218089602705e-05, "loss": 23.8506, "step": 1244, "task_loss": 0.7799176573753357 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22010182243291299, "compression/movement_sparsity/importance_threshold": -0.004018918031286277, "compression/movement_sparsity/linear_layer_sparsity": 0.022140114788715196, "compression/movement_sparsity/model_sparsity": 0.02137953373476306, "compression_loss": 23.763362884521484, "distillation_loss": 0.5162093639373779, "epoch": 1.05, "learning_rate": 4.473795435333897e-05, "loss": 24.4094, "step": 1245, "task_loss": 0.8172413110733032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2235179189665779, "compression/movement_sparsity/importance_threshold": -0.004008191387404452, "compression/movement_sparsity/linear_layer_sparsity": 0.023022717828786506, "compression/movement_sparsity/model_sparsity": 0.02223181664520338, "compression_loss": 24.1318359375, "distillation_loss": 0.45048242807388306, "epoch": 1.05, "learning_rate": 4.4733727810650885e-05, "loss": 24.7289, "step": 1246, "task_loss": 0.6320061087608337 }, { "compression/movement_sparsity/importance_regularization_factor": 0.22692793162556762, "compression/movement_sparsity/importance_threshold": -0.003997483847068564, "compression/movement_sparsity/linear_layer_sparsity": 0.023806863016688492, "compression/movement_sparsity/model_sparsity": 0.022989024033588178, "compression_loss": 24.4996337890625, "distillation_loss": 0.34416663646698, "epoch": 1.05, "learning_rate": 4.472950126796281e-05, "loss": 25.1268, "step": 1247, "task_loss": 0.700141191482544 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23033186583223103, "compression/movement_sparsity/importance_threshold": -0.00398679539325228, "compression/movement_sparsity/linear_layer_sparsity": 0.024782951531024968, "compression/movement_sparsity/model_sparsity": 0.023931580904657697, "compression_loss": 24.866764068603516, "distillation_loss": 0.6046329140663147, "epoch": 1.05, "learning_rate": 4.472527472527473e-05, "loss": 25.4093, "step": 1248, "task_loss": 0.9314168691635132 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23372972700891603, "compression/movement_sparsity/importance_threshold": -0.003976126008929267, "compression/movement_sparsity/linear_layer_sparsity": 0.025749691497934924, "compression/movement_sparsity/model_sparsity": 0.0248651103796643, "compression_loss": 25.233219146728516, "distillation_loss": 0.5717558860778809, "epoch": 1.06, "learning_rate": 4.4721048182586645e-05, "loss": 25.6036, "step": 1249, "task_loss": 0.4934878647327423 }, { "compression/movement_sparsity/importance_regularization_factor": 0.23712152057797037, "compression/movement_sparsity/importance_threshold": -0.003965475677073194, "compression/movement_sparsity/linear_layer_sparsity": 0.026788536906538946, "compression/movement_sparsity/model_sparsity": 0.025868268252620487, "compression_loss": 25.599010467529297, "distillation_loss": 0.4805406928062439, "epoch": 1.06, "learning_rate": 4.4716821639898564e-05, "loss": 26.1047, "step": 1250, "task_loss": 0.6133074760437012 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24050725196174294, "compression/movement_sparsity/importance_threshold": -0.003954844380657724, "compression/movement_sparsity/linear_layer_sparsity": 0.0276484124830963, "compression/movement_sparsity/model_sparsity": 0.026698604457836412, "compression_loss": 25.964120864868164, "distillation_loss": 0.7605350613594055, "epoch": 1.06, "learning_rate": 4.4712595097210484e-05, "loss": 26.4628, "step": 1251, "task_loss": 0.9141960144042969 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24388692658258165, "compression/movement_sparsity/importance_threshold": -0.003944232102656525, "compression/movement_sparsity/linear_layer_sparsity": 0.028573918678321444, "compression/movement_sparsity/model_sparsity": 0.02759231666806549, "compression_loss": 26.328554153442383, "distillation_loss": 0.3859879970550537, "epoch": 1.06, "learning_rate": 4.4708368554522404e-05, "loss": 26.8008, "step": 1252, "task_loss": 1.35304594039917 }, { "compression/movement_sparsity/importance_regularization_factor": 0.24726054986283408, "compression/movement_sparsity/importance_threshold": -0.0039336388260432665, "compression/movement_sparsity/linear_layer_sparsity": 0.029429048436159722, "compression/movement_sparsity/model_sparsity": 0.02841807008803519, "compression_loss": 26.692289352416992, "distillation_loss": 0.5085939764976501, "epoch": 1.06, "learning_rate": 4.4704142011834324e-05, "loss": 27.3367, "step": 1253, "task_loss": 1.047324538230896 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2506281272248496, "compression/movement_sparsity/importance_threshold": -0.003923064533791611, "compression/movement_sparsity/linear_layer_sparsity": 0.030428985285767925, "compression/movement_sparsity/model_sparsity": 0.029383656030693792, "compression_loss": 27.055377960205078, "distillation_loss": 0.5203262567520142, "epoch": 1.06, "learning_rate": 4.469991546914624e-05, "loss": 27.6316, "step": 1254, "task_loss": 0.8381924629211426 }, { "compression/movement_sparsity/importance_regularization_factor": 0.25398966409097584, "compression/movement_sparsity/importance_threshold": -0.003912509208875228, "compression/movement_sparsity/linear_layer_sparsity": 0.03158801437997381, "compression/movement_sparsity/model_sparsity": 0.030502868909923157, "compression_loss": 27.417831420898438, "distillation_loss": 0.5855239629745483, "epoch": 1.06, "learning_rate": 4.4695688926458156e-05, "loss": 27.9559, "step": 1255, "task_loss": 1.0134457349777222 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2573451658835608, "compression/movement_sparsity/importance_threshold": -0.003901972834267782, "compression/movement_sparsity/linear_layer_sparsity": 0.03247214371350251, "compression/movement_sparsity/model_sparsity": 0.03135662568094518, "compression_loss": 27.779600143432617, "distillation_loss": 0.6592015624046326, "epoch": 1.06, "learning_rate": 4.4691462383770076e-05, "loss": 28.3672, "step": 1256, "task_loss": 0.5244863629341125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26069463802495263, "compression/movement_sparsity/importance_threshold": -0.0038914553929429428, "compression/movement_sparsity/linear_layer_sparsity": 0.03370583002127653, "compression/movement_sparsity/model_sparsity": 0.03254793106878416, "compression_loss": 28.14071273803711, "distillation_loss": 0.6236889958381653, "epoch": 1.06, "learning_rate": 4.4687235841081996e-05, "loss": 28.6155, "step": 1257, "task_loss": 0.9864320755004883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2640380859375, "compression/movement_sparsity/importance_threshold": -0.0038809568678743744, "compression/movement_sparsity/linear_layer_sparsity": 0.03460236048914653, "compression/movement_sparsity/model_sparsity": 0.033413662957032506, "compression_loss": 28.501176834106445, "distillation_loss": 0.4905117154121399, "epoch": 1.06, "learning_rate": 4.4683009298393915e-05, "loss": 28.9498, "step": 1258, "task_loss": 0.5909712910652161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.26737551504355084, "compression/movement_sparsity/importance_threshold": -0.0038704772420357445, "compression/movement_sparsity/linear_layer_sparsity": 0.035666783237329476, "compression/movement_sparsity/model_sparsity": 0.034441519509267986, "compression_loss": 28.860977172851562, "distillation_loss": 0.2632543742656708, "epoch": 1.06, "learning_rate": 4.4678782755705835e-05, "loss": 29.2396, "step": 1259, "task_loss": 0.026146475225687027 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27070693076545305, "compression/movement_sparsity/importance_threshold": -0.003860016498400721, "compression/movement_sparsity/linear_layer_sparsity": 0.0368348031539328, "compression/movement_sparsity/model_sparsity": 0.035569414348486435, "compression_loss": 29.220075607299805, "distillation_loss": 0.6554915308952332, "epoch": 1.07, "learning_rate": 4.4674556213017755e-05, "loss": 29.6711, "step": 1260, "task_loss": 0.5693647861480713 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27403233852555536, "compression/movement_sparsity/importance_threshold": -0.003849574619942968, "compression/movement_sparsity/linear_layer_sparsity": 0.03797514707745329, "compression/movement_sparsity/model_sparsity": 0.036670583950125756, "compression_loss": 29.578508377075195, "distillation_loss": 0.48726174235343933, "epoch": 1.07, "learning_rate": 4.4670329670329675e-05, "loss": 30.0596, "step": 1261, "task_loss": 0.6845362186431885 }, { "compression/movement_sparsity/importance_regularization_factor": 0.27735174374620586, "compression/movement_sparsity/importance_threshold": -0.003839151589636154, "compression/movement_sparsity/linear_layer_sparsity": 0.039314290723798884, "compression/movement_sparsity/model_sparsity": 0.03796372389253166, "compression_loss": 29.93628692626953, "distillation_loss": 0.4077973961830139, "epoch": 1.07, "learning_rate": 4.466610312764159e-05, "loss": 30.511, "step": 1262, "task_loss": 0.5983346104621887 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2806651518497526, "compression/movement_sparsity/importance_threshold": -0.003828747390453945, "compression/movement_sparsity/linear_layer_sparsity": 0.04042754293845069, "compression/movement_sparsity/model_sparsity": 0.03903873246884578, "compression_loss": 30.29340362548828, "distillation_loss": 0.4096171259880066, "epoch": 1.07, "learning_rate": 4.466187658495351e-05, "loss": 30.8892, "step": 1263, "task_loss": 0.16335059702396393 }, { "compression/movement_sparsity/importance_regularization_factor": 0.28397256825854333, "compression/movement_sparsity/importance_threshold": -0.0038183620053700096, "compression/movement_sparsity/linear_layer_sparsity": 0.04175581174391238, "compression/movement_sparsity/model_sparsity": 0.040321371154607065, "compression_loss": 30.649873733520508, "distillation_loss": 0.6780976057052612, "epoch": 1.07, "learning_rate": 4.4657650042265434e-05, "loss": 31.2155, "step": 1264, "task_loss": 0.8164452314376831 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2872739983949271, "compression/movement_sparsity/importance_threshold": -0.003807995417358012, "compression/movement_sparsity/linear_layer_sparsity": 0.04274527917433629, "compression/movement_sparsity/model_sparsity": 0.04127684733483806, "compression_loss": 31.00570297241211, "distillation_loss": 0.3203093111515045, "epoch": 1.07, "learning_rate": 4.465342349957735e-05, "loss": 31.419, "step": 1265, "task_loss": 0.5225319862365723 }, { "compression/movement_sparsity/importance_regularization_factor": 0.2905694476812517, "compression/movement_sparsity/importance_threshold": -0.00379764760939162, "compression/movement_sparsity/linear_layer_sparsity": 0.044160737493551414, "compression/movement_sparsity/model_sparsity": 0.04264368030632903, "compression_loss": 31.360864639282227, "distillation_loss": 0.7801963686943054, "epoch": 1.07, "learning_rate": 4.4649196956889267e-05, "loss": 32.0025, "step": 1266, "task_loss": 1.1612662076950073 }, { "compression/movement_sparsity/importance_regularization_factor": 0.293858921539865, "compression/movement_sparsity/importance_threshold": -0.003787318564444501, "compression/movement_sparsity/linear_layer_sparsity": 0.04540467858549228, "compression/movement_sparsity/model_sparsity": 0.04384488819495132, "compression_loss": 31.71538734436035, "distillation_loss": 0.44098103046417236, "epoch": 1.07, "learning_rate": 4.4644970414201186e-05, "loss": 32.1975, "step": 1267, "task_loss": 1.3053979873657227 }, { "compression/movement_sparsity/importance_regularization_factor": 0.29714242539311586, "compression/movement_sparsity/importance_threshold": -0.003777008265490321, "compression/movement_sparsity/linear_layer_sparsity": 0.04666427610953903, "compression/movement_sparsity/model_sparsity": 0.045061214669071835, "compression_loss": 32.06924819946289, "distillation_loss": 0.350536584854126, "epoch": 1.07, "learning_rate": 4.46407438715131e-05, "loss": 32.6281, "step": 1268, "task_loss": 0.8032504916191101 }, { "compression/movement_sparsity/importance_regularization_factor": 0.30041996466335225, "compression/movement_sparsity/importance_threshold": -0.003766716695502745, "compression/movement_sparsity/linear_layer_sparsity": 0.04781054634437454, "compression/movement_sparsity/model_sparsity": 0.04616810699500105, "compression_loss": 32.42247772216797, "distillation_loss": 0.459830641746521, "epoch": 1.07, "learning_rate": 4.4636517328825026e-05, "loss": 32.8516, "step": 1269, "task_loss": 0.7863566279411316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3036915447729217, "compression/movement_sparsity/importance_threshold": -0.003756443837455444, "compression/movement_sparsity/linear_layer_sparsity": 0.04906270318781652, "compression/movement_sparsity/model_sparsity": 0.04737724839878577, "compression_loss": 32.775001525878906, "distillation_loss": 0.6473171710968018, "epoch": 1.07, "learning_rate": 4.4632290786136946e-05, "loss": 33.1638, "step": 1270, "task_loss": 0.4224321246147156 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3069571711441735, "compression/movement_sparsity/importance_threshold": -0.0037461896743220804, "compression/movement_sparsity/linear_layer_sparsity": 0.050244245110518906, "compression/movement_sparsity/model_sparsity": 0.04851820072159523, "compression_loss": 33.126888275146484, "distillation_loss": 0.2960829734802246, "epoch": 1.07, "learning_rate": 4.462806424344886e-05, "loss": 33.4904, "step": 1271, "task_loss": 0.5079767107963562 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31021684919945514, "compression/movement_sparsity/importance_threshold": -0.003735954189076323, "compression/movement_sparsity/linear_layer_sparsity": 0.051406565274992294, "compression/movement_sparsity/model_sparsity": 0.04964059161270389, "compression_loss": 33.47807312011719, "distillation_loss": 0.36031287908554077, "epoch": 1.08, "learning_rate": 4.462383770076078e-05, "loss": 33.8437, "step": 1272, "task_loss": 0.8334219455718994 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31347058436111475, "compression/movement_sparsity/importance_threshold": -0.003725737364691839, "compression/movement_sparsity/linear_layer_sparsity": 0.052793012094349355, "compression/movement_sparsity/model_sparsity": 0.05097940971860674, "compression_loss": 33.82857131958008, "distillation_loss": 0.38945573568344116, "epoch": 1.08, "learning_rate": 4.46196111580727e-05, "loss": 34.2726, "step": 1273, "task_loss": 0.9228662252426147 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31671838205150105, "compression/movement_sparsity/importance_threshold": -0.0037155391841422938, "compression/movement_sparsity/linear_layer_sparsity": 0.05416271738234567, "compression/movement_sparsity/model_sparsity": 0.05230206141625405, "compression_loss": 34.178428649902344, "distillation_loss": 1.5171643495559692, "epoch": 1.08, "learning_rate": 4.461538461538462e-05, "loss": 35.3024, "step": 1274, "task_loss": 1.7723408937454224 }, { "compression/movement_sparsity/importance_regularization_factor": 0.31996024769296194, "compression/movement_sparsity/importance_threshold": -0.003705359630401354, "compression/movement_sparsity/linear_layer_sparsity": 0.05539598634425243, "compression/movement_sparsity/model_sparsity": 0.05349296379534023, "compression_loss": 34.527652740478516, "distillation_loss": 0.5163383483886719, "epoch": 1.08, "learning_rate": 4.461115807269654e-05, "loss": 35.4852, "step": 1275, "task_loss": 0.6891390681266785 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32319618670784567, "compression/movement_sparsity/importance_threshold": -0.003695198686442687, "compression/movement_sparsity/linear_layer_sparsity": 0.05650888083387517, "compression/movement_sparsity/model_sparsity": 0.05456762693558051, "compression_loss": 34.87626266479492, "distillation_loss": 1.208616018295288, "epoch": 1.08, "learning_rate": 4.460693153000846e-05, "loss": 35.7273, "step": 1276, "task_loss": 0.8003085851669312 }, { "compression/movement_sparsity/importance_regularization_factor": 0.32642620451849996, "compression/movement_sparsity/importance_threshold": -0.003685056335239961, "compression/movement_sparsity/linear_layer_sparsity": 0.05764483666370566, "compression/movement_sparsity/model_sparsity": 0.05566455918804744, "compression_loss": 35.22423553466797, "distillation_loss": 0.6112419962882996, "epoch": 1.08, "learning_rate": 4.460270498732038e-05, "loss": 35.9297, "step": 1277, "task_loss": 0.41883552074432373 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3296503065472739, "compression/movement_sparsity/importance_threshold": -0.00367493255976684, "compression/movement_sparsity/linear_layer_sparsity": 0.05893800071964737, "compression/movement_sparsity/model_sparsity": 0.056913299080429594, "compression_loss": 35.57155227661133, "distillation_loss": 0.7337345480918884, "epoch": 1.08, "learning_rate": 4.459847844463229e-05, "loss": 36.1913, "step": 1278, "task_loss": 0.759214460849762 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33286849821651504, "compression/movement_sparsity/importance_threshold": -0.003664827342996992, "compression/movement_sparsity/linear_layer_sparsity": 0.06020794842120205, "compression/movement_sparsity/model_sparsity": 0.058139620171619784, "compression_loss": 35.918243408203125, "distillation_loss": 0.5227680206298828, "epoch": 1.08, "learning_rate": 4.459425190194421e-05, "loss": 36.4221, "step": 1279, "task_loss": 0.09014008939266205 }, { "compression/movement_sparsity/importance_regularization_factor": 0.33608078494857196, "compression/movement_sparsity/importance_threshold": -0.0036547406679040837, "compression/movement_sparsity/linear_layer_sparsity": 0.06133783484970588, "compression/movement_sparsity/model_sparsity": 0.059230691525367286, "compression_loss": 36.26424026489258, "distillation_loss": 0.6876983642578125, "epoch": 1.08, "learning_rate": 4.459002535925613e-05, "loss": 36.829, "step": 1280, "task_loss": 1.2024675607681274 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3392871721657921, "compression/movement_sparsity/importance_threshold": -0.003644672517461783, "compression/movement_sparsity/linear_layer_sparsity": 0.0625562224504031, "compression/movement_sparsity/model_sparsity": 0.060407223763781874, "compression_loss": 36.609622955322266, "distillation_loss": 0.3284452259540558, "epoch": 1.08, "learning_rate": 4.458579881656805e-05, "loss": 37.175, "step": 1281, "task_loss": 0.7041627764701843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3424876652905246, "compression/movement_sparsity/importance_threshold": -0.003634622874643754, "compression/movement_sparsity/linear_layer_sparsity": 0.06385704182196703, "compression/movement_sparsity/model_sparsity": 0.061663355988144125, "compression_loss": 36.95436096191406, "distillation_loss": 0.5951570272445679, "epoch": 1.08, "learning_rate": 4.458157227387997e-05, "loss": 37.4481, "step": 1282, "task_loss": 0.9332437515258789 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3456822697451173, "compression/movement_sparsity/importance_threshold": -0.003624591722423665, "compression/movement_sparsity/linear_layer_sparsity": 0.06531987485919943, "compression/movement_sparsity/model_sparsity": 0.06307593621034681, "compression_loss": 37.29840850830078, "distillation_loss": 0.6705347895622253, "epoch": 1.08, "learning_rate": 4.457734573119189e-05, "loss": 37.9679, "step": 1283, "task_loss": 0.5198345184326172 }, { "compression/movement_sparsity/importance_regularization_factor": 0.34887099095191787, "compression/movement_sparsity/importance_threshold": -0.0036145790437751837, "compression/movement_sparsity/linear_layer_sparsity": 0.06659893262482791, "compression/movement_sparsity/model_sparsity": 0.06431105440688402, "compression_loss": 37.64185333251953, "distillation_loss": 0.5819041728973389, "epoch": 1.09, "learning_rate": 4.45731191885038e-05, "loss": 38.0916, "step": 1284, "task_loss": 0.2840628921985626 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3520538343332754, "compression/movement_sparsity/importance_threshold": -0.0036045848216719745, "compression/movement_sparsity/linear_layer_sparsity": 0.06791487184095411, "compression/movement_sparsity/model_sparsity": 0.06558178706263375, "compression_loss": 37.9846305847168, "distillation_loss": 0.39920535683631897, "epoch": 1.09, "learning_rate": 4.456889264581572e-05, "loss": 38.4294, "step": 1285, "task_loss": 0.8727322220802307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.35523080531153767, "compression/movement_sparsity/importance_threshold": -0.0035946090390877056, "compression/movement_sparsity/linear_layer_sparsity": 0.06926108651870778, "compression/movement_sparsity/model_sparsity": 0.06688175512476581, "compression_loss": 38.32673645019531, "distillation_loss": 0.3835964500904083, "epoch": 1.09, "learning_rate": 4.456466610312765e-05, "loss": 38.8715, "step": 1286, "task_loss": 0.44173380732536316 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3584019093090525, "compression/movement_sparsity/importance_threshold": -0.0035846516789960446, "compression/movement_sparsity/linear_layer_sparsity": 0.07057676340314599, "compression/movement_sparsity/model_sparsity": 0.06815223446072806, "compression_loss": 38.6682243347168, "distillation_loss": 0.29448437690734863, "epoch": 1.09, "learning_rate": 4.456043956043957e-05, "loss": 39.3928, "step": 1287, "task_loss": 0.0277054812759161 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3615671517481689, "compression/movement_sparsity/importance_threshold": -0.0035747127243706553, "compression/movement_sparsity/linear_layer_sparsity": 0.07190335090097102, "compression/movement_sparsity/model_sparsity": 0.0694332495969423, "compression_loss": 39.009124755859375, "distillation_loss": 0.5504357218742371, "epoch": 1.09, "learning_rate": 4.455621301775148e-05, "loss": 39.5186, "step": 1288, "task_loss": 1.0629488229751587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3647265380512345, "compression/movement_sparsity/importance_threshold": -0.003564792158185207, "compression/movement_sparsity/linear_layer_sparsity": 0.07328470610074758, "compression/movement_sparsity/model_sparsity": 0.07076715099606089, "compression_loss": 39.34932327270508, "distillation_loss": 0.5402329564094543, "epoch": 1.09, "learning_rate": 4.45519864750634e-05, "loss": 40.0552, "step": 1289, "task_loss": 0.2442416101694107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.36788007364059727, "compression/movement_sparsity/importance_threshold": -0.0035548899634133668, "compression/movement_sparsity/linear_layer_sparsity": 0.07455445109145245, "compression/movement_sparsity/model_sparsity": 0.07199327634014256, "compression_loss": 39.68889236450195, "distillation_loss": 0.3655256927013397, "epoch": 1.09, "learning_rate": 4.454775993237532e-05, "loss": 40.4195, "step": 1290, "task_loss": 0.3499029278755188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37102776393860626, "compression/movement_sparsity/importance_threshold": -0.003545006123028798, "compression/movement_sparsity/linear_layer_sparsity": 0.07616641969330659, "compression/movement_sparsity/model_sparsity": 0.07354986886152756, "compression_loss": 40.02782440185547, "distillation_loss": 0.4868693947792053, "epoch": 1.09, "learning_rate": 4.454353338968724e-05, "loss": 40.4886, "step": 1291, "task_loss": 0.7678642272949219 }, { "compression/movement_sparsity/importance_regularization_factor": 0.37416961436760904, "compression/movement_sparsity/importance_threshold": -0.0035351406200051705, "compression/movement_sparsity/linear_layer_sparsity": 0.07752653795052367, "compression/movement_sparsity/model_sparsity": 0.07486326287239606, "compression_loss": 40.3660774230957, "distillation_loss": 1.2233290672302246, "epoch": 1.09, "learning_rate": 4.453930684699916e-05, "loss": 41.1705, "step": 1292, "task_loss": 1.277454137802124 }, { "compression/movement_sparsity/importance_regularization_factor": 0.377305630349954, "compression/movement_sparsity/importance_threshold": -0.003525293437316149, "compression/movement_sparsity/linear_layer_sparsity": 0.07884109396320411, "compression/movement_sparsity/model_sparsity": 0.07613265984199362, "compression_loss": 40.7037239074707, "distillation_loss": 0.6494832634925842, "epoch": 1.09, "learning_rate": 4.453508030431108e-05, "loss": 41.3183, "step": 1293, "task_loss": 0.46674782037734985 }, { "compression/movement_sparsity/importance_regularization_factor": 0.38043581730798876, "compression/movement_sparsity/importance_threshold": -0.003515464557935404, "compression/movement_sparsity/linear_layer_sparsity": 0.08025448940141823, "compression/movement_sparsity/model_sparsity": 0.07749750079879214, "compression_loss": 41.040740966796875, "distillation_loss": 0.545191764831543, "epoch": 1.09, "learning_rate": 4.453085376162299e-05, "loss": 41.6571, "step": 1294, "task_loss": 0.5185684561729431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3835601806640625, "compression/movement_sparsity/importance_threshold": -0.003505653964836597, "compression/movement_sparsity/linear_layer_sparsity": 0.08188674101242098, "compression/movement_sparsity/model_sparsity": 0.07907367954556364, "compression_loss": 41.37712097167969, "distillation_loss": 0.5012102723121643, "epoch": 1.09, "learning_rate": 4.452662721893491e-05, "loss": 41.9171, "step": 1295, "task_loss": 0.6159313917160034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3866787258405228, "compression/movement_sparsity/importance_threshold": -0.003495861640993398, "compression/movement_sparsity/linear_layer_sparsity": 0.08340428213076671, "compression/movement_sparsity/model_sparsity": 0.08053908845799168, "compression_loss": 41.712806701660156, "distillation_loss": 0.8001213073730469, "epoch": 1.1, "learning_rate": 4.452240067624683e-05, "loss": 42.3828, "step": 1296, "task_loss": 0.43087005615234375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3897914582597178, "compression/movement_sparsity/importance_threshold": -0.003486087569379474, "compression/movement_sparsity/linear_layer_sparsity": 0.0848317957834617, "compression/movement_sparsity/model_sparsity": 0.08191756262517091, "compression_loss": 42.04793167114258, "distillation_loss": 0.5749796628952026, "epoch": 1.1, "learning_rate": 4.451817413355875e-05, "loss": 42.4919, "step": 1297, "task_loss": 0.44719234108924866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3928983833439961, "compression/movement_sparsity/importance_threshold": -0.003476331732968489, "compression/movement_sparsity/linear_layer_sparsity": 0.086385765233935, "compression/movement_sparsity/model_sparsity": 0.08341814844445126, "compression_loss": 42.382362365722656, "distillation_loss": 0.5764374136924744, "epoch": 1.1, "learning_rate": 4.451394759087067e-05, "loss": 42.7172, "step": 1298, "task_loss": 0.30917418003082275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.39599950651570576, "compression/movement_sparsity/importance_threshold": -0.003466594114734112, "compression/movement_sparsity/linear_layer_sparsity": 0.08800423250715068, "compression/movement_sparsity/model_sparsity": 0.08498101638784429, "compression_loss": 42.71617889404297, "distillation_loss": 1.1756658554077148, "epoch": 1.1, "learning_rate": 4.450972104818259e-05, "loss": 43.5296, "step": 1299, "task_loss": 0.970940887928009 }, { "compression/movement_sparsity/importance_regularization_factor": 0.3990948331971949, "compression/movement_sparsity/importance_threshold": -0.0034568746976500085, "compression/movement_sparsity/linear_layer_sparsity": 0.08962884072669884, "compression/movement_sparsity/model_sparsity": 0.0865498143171715, "compression_loss": 43.049354553222656, "distillation_loss": 0.3907695412635803, "epoch": 1.1, "learning_rate": 4.4505494505494504e-05, "loss": 43.5209, "step": 1300, "task_loss": 0.3283112943172455 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40218436881081115, "compression/movement_sparsity/importance_threshold": -0.003447173464689848, "compression/movement_sparsity/linear_layer_sparsity": 0.09130783507483417, "compression/movement_sparsity/model_sparsity": 0.08817113004425761, "compression_loss": 43.38188552856445, "distillation_loss": 0.5744103193283081, "epoch": 1.1, "learning_rate": 4.4501267962806424e-05, "loss": 43.887, "step": 1301, "task_loss": 0.4608490765094757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40526811877890373, "compression/movement_sparsity/importance_threshold": -0.003437490398827293, "compression/movement_sparsity/linear_layer_sparsity": 0.09294863631403183, "compression/movement_sparsity/model_sparsity": 0.0897555647131938, "compression_loss": 43.713741302490234, "distillation_loss": 0.7757353782653809, "epoch": 1.1, "learning_rate": 4.449704142011834e-05, "loss": 44.3188, "step": 1302, "task_loss": 1.9043248891830444 }, { "compression/movement_sparsity/importance_regularization_factor": 0.40834608852382054, "compression/movement_sparsity/importance_threshold": -0.003427825483036012, "compression/movement_sparsity/linear_layer_sparsity": 0.09464021065902299, "compression/movement_sparsity/model_sparsity": 0.09138902827554317, "compression_loss": 44.044921875, "distillation_loss": 0.5997978448867798, "epoch": 1.1, "learning_rate": 4.449281487743027e-05, "loss": 44.5606, "step": 1303, "task_loss": 0.23925459384918213 }, { "compression/movement_sparsity/importance_regularization_factor": 0.411418283467909, "compression/movement_sparsity/importance_threshold": -0.0034181787002896744, "compression/movement_sparsity/linear_layer_sparsity": 0.09646436982395731, "compression/movement_sparsity/model_sparsity": 0.09315052196139202, "compression_loss": 44.37547302246094, "distillation_loss": 0.5133511424064636, "epoch": 1.1, "learning_rate": 4.448858833474218e-05, "loss": 44.9213, "step": 1304, "task_loss": 1.0616811513900757 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4144847090335183, "compression/movement_sparsity/importance_threshold": -0.0034085500335619425, "compression/movement_sparsity/linear_layer_sparsity": 0.09818016215341692, "compression/movement_sparsity/model_sparsity": 0.09480737154594007, "compression_loss": 44.70539093017578, "distillation_loss": 0.34925776720046997, "epoch": 1.1, "learning_rate": 4.44843617920541e-05, "loss": 45.2259, "step": 1305, "task_loss": 0.8480327725410461 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4175453706429964, "compression/movement_sparsity/importance_threshold": -0.003398939465826485, "compression/movement_sparsity/linear_layer_sparsity": 0.09994365115341997, "compression/movement_sparsity/model_sparsity": 0.0965102792736663, "compression_loss": 45.034645080566406, "distillation_loss": 0.27559226751327515, "epoch": 1.1, "learning_rate": 4.448013524936602e-05, "loss": 45.3684, "step": 1306, "task_loss": 0.6624144911766052 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4206002737186907, "compression/movement_sparsity/importance_threshold": -0.0033893469800569706, "compression/movement_sparsity/linear_layer_sparsity": 0.10182999485257531, "compression/movement_sparsity/model_sparsity": 0.09833182126368369, "compression_loss": 45.36323928833008, "distillation_loss": 0.6117426156997681, "epoch": 1.1, "learning_rate": 4.4475908706677935e-05, "loss": 46.0391, "step": 1307, "task_loss": 1.6863586902618408 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42364942368295055, "compression/movement_sparsity/importance_threshold": -0.0033797725592270623, "compression/movement_sparsity/linear_layer_sparsity": 0.10368620618011484, "compression/movement_sparsity/model_sparsity": 0.10012426602174826, "compression_loss": 45.691219329833984, "distillation_loss": 0.3198488652706146, "epoch": 1.11, "learning_rate": 4.447168216398986e-05, "loss": 46.1917, "step": 1308, "task_loss": 0.4496100842952728 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42669282595812374, "compression/movement_sparsity/importance_threshold": -0.0033702161863104286, "compression/movement_sparsity/linear_layer_sparsity": 0.10527419528085326, "compression/movement_sparsity/model_sparsity": 0.10165770281165044, "compression_loss": 46.01858139038086, "distillation_loss": 0.38542452454566956, "epoch": 1.11, "learning_rate": 4.446745562130178e-05, "loss": 46.554, "step": 1309, "task_loss": 0.3755490481853485 }, { "compression/movement_sparsity/importance_regularization_factor": 0.42973048596655783, "compression/movement_sparsity/importance_threshold": -0.0033606778442807376, "compression/movement_sparsity/linear_layer_sparsity": 0.10724181410661465, "compression/movement_sparsity/model_sparsity": 0.1035577278776434, "compression_loss": 46.345279693603516, "distillation_loss": 0.6864478588104248, "epoch": 1.11, "learning_rate": 4.4463229078613694e-05, "loss": 46.9479, "step": 1310, "task_loss": 0.6452550888061523 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4327624091306019, "compression/movement_sparsity/importance_threshold": -0.0033511575161116534, "compression/movement_sparsity/linear_layer_sparsity": 0.10911438539215058, "compression/movement_sparsity/model_sparsity": 0.1053659705788181, "compression_loss": 46.671348571777344, "distillation_loss": 0.39784860610961914, "epoch": 1.11, "learning_rate": 4.4459002535925614e-05, "loss": 47.2376, "step": 1311, "task_loss": 0.6274633407592773 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4357886008726038, "compression/movement_sparsity/importance_threshold": -0.003341655184776844, "compression/movement_sparsity/linear_layer_sparsity": 0.11101962889700877, "compression/movement_sparsity/model_sparsity": 0.10720576310806983, "compression_loss": 46.99675750732422, "distillation_loss": 0.4980035126209259, "epoch": 1.11, "learning_rate": 4.4454775993237534e-05, "loss": 47.4028, "step": 1312, "task_loss": 0.6609987616539001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4388090666149118, "compression/movement_sparsity/importance_threshold": -0.0033321708332499757, "compression/movement_sparsity/linear_layer_sparsity": 0.11290673574289281, "compression/movement_sparsity/model_sparsity": 0.10902804202837806, "compression_loss": 47.32157516479492, "distillation_loss": 0.6137720346450806, "epoch": 1.11, "learning_rate": 4.445054945054945e-05, "loss": 47.7557, "step": 1313, "task_loss": 1.381227731704712 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4418238117798735, "compression/movement_sparsity/importance_threshold": -0.003322704444504718, "compression/movement_sparsity/linear_layer_sparsity": 0.1147247658856623, "compression/movement_sparsity/model_sparsity": 0.11078361724282851, "compression_loss": 47.64578628540039, "distillation_loss": 0.7195747494697571, "epoch": 1.11, "learning_rate": 4.4446322907861373e-05, "loss": 48.2647, "step": 1314, "task_loss": 0.5013588666915894 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44483284178983795, "compression/movement_sparsity/importance_threshold": -0.0033132560015147333, "compression/movement_sparsity/linear_layer_sparsity": 0.11677177581954327, "compression/movement_sparsity/model_sparsity": 0.11276030608814154, "compression_loss": 47.9693489074707, "distillation_loss": 0.4741535484790802, "epoch": 1.11, "learning_rate": 4.444209636517329e-05, "loss": 48.4193, "step": 1315, "task_loss": 1.341664433479309 }, { "compression/movement_sparsity/importance_regularization_factor": 0.44783616206715304, "compression/movement_sparsity/importance_threshold": -0.0033038254872536903, "compression/movement_sparsity/linear_layer_sparsity": 0.11879701222332115, "compression/movement_sparsity/model_sparsity": 0.11471596939109373, "compression_loss": 48.292205810546875, "distillation_loss": 0.4550090730190277, "epoch": 1.11, "learning_rate": 4.443786982248521e-05, "loss": 48.7506, "step": 1316, "task_loss": 0.25337034463882446 }, { "compression/movement_sparsity/importance_regularization_factor": 0.450833778034167, "compression/movement_sparsity/importance_threshold": -0.0032944128846952555, "compression/movement_sparsity/linear_layer_sparsity": 0.12061949008061883, "compression/movement_sparsity/model_sparsity": 0.11647583952739556, "compression_loss": 48.61443328857422, "distillation_loss": 0.5711060762405396, "epoch": 1.11, "learning_rate": 4.4433643279797126e-05, "loss": 49.2495, "step": 1317, "task_loss": 0.4683636724948883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.45382569511322746, "compression/movement_sparsity/importance_threshold": -0.003285018176813097, "compression/movement_sparsity/linear_layer_sparsity": 0.1226937348133801, "compression/movement_sparsity/model_sparsity": 0.11847882757246331, "compression_loss": 48.9360466003418, "distillation_loss": 0.9420948624610901, "epoch": 1.11, "learning_rate": 4.4429416737109046e-05, "loss": 49.7345, "step": 1318, "task_loss": 0.6032072305679321 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4568119187266834, "compression/movement_sparsity/importance_threshold": -0.0032756413465808796, "compression/movement_sparsity/linear_layer_sparsity": 0.12462016756412722, "compression/movement_sparsity/model_sparsity": 0.12033908143182193, "compression_loss": 49.2570686340332, "distillation_loss": 0.5850843191146851, "epoch": 1.11, "learning_rate": 4.4425190194420965e-05, "loss": 49.8058, "step": 1319, "task_loss": 0.4643186330795288 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4597924542968828, "compression/movement_sparsity/importance_threshold": -0.003266282376972271, "compression/movement_sparsity/linear_layer_sparsity": 0.12671122537325508, "compression/movement_sparsity/model_sparsity": 0.12235830497235999, "compression_loss": 49.57742691040039, "distillation_loss": 0.6551699638366699, "epoch": 1.12, "learning_rate": 4.4420963651732885e-05, "loss": 50.3427, "step": 1320, "task_loss": 1.0393555164337158 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46276730724617354, "compression/movement_sparsity/importance_threshold": -0.003256941250960938, "compression/movement_sparsity/linear_layer_sparsity": 0.12876782233791526, "compression/movement_sparsity/model_sparsity": 0.12434425150445183, "compression_loss": 49.89724349975586, "distillation_loss": 0.5629104971885681, "epoch": 1.12, "learning_rate": 4.4416737109044805e-05, "loss": 50.5745, "step": 1321, "task_loss": 0.513153612613678 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4657364829969042, "compression/movement_sparsity/importance_threshold": -0.003247617951520546, "compression/movement_sparsity/linear_layer_sparsity": 0.1308728075748418, "compression/movement_sparsity/model_sparsity": 0.1263769240227979, "compression_loss": 50.21639633178711, "distillation_loss": 0.5485292673110962, "epoch": 1.12, "learning_rate": 4.4412510566356725e-05, "loss": 50.9856, "step": 1322, "task_loss": 1.4749886989593506 }, { "compression/movement_sparsity/importance_regularization_factor": 0.46869998697142295, "compression/movement_sparsity/importance_threshold": -0.003238312461624763, "compression/movement_sparsity/linear_layer_sparsity": 0.13266208854944123, "compression/movement_sparsity/model_sparsity": 0.12810473769144773, "compression_loss": 50.534996032714844, "distillation_loss": 0.868882417678833, "epoch": 1.12, "learning_rate": 4.440828402366864e-05, "loss": 51.2033, "step": 1323, "task_loss": 0.9010410904884338 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4716578245920773, "compression/movement_sparsity/importance_threshold": -0.003229024764247257, "compression/movement_sparsity/linear_layer_sparsity": 0.13473499777542727, "compression/movement_sparsity/model_sparsity": 0.1301064361085065, "compression_loss": 50.852928161621094, "distillation_loss": 0.30092743039131165, "epoch": 1.12, "learning_rate": 4.440405748098056e-05, "loss": 51.2459, "step": 1324, "task_loss": 0.7190616726875305 }, { "compression/movement_sparsity/importance_regularization_factor": 0.47461000128121655, "compression/movement_sparsity/importance_threshold": -0.003219754842361691, "compression/movement_sparsity/linear_layer_sparsity": 0.13662842443015832, "compression/movement_sparsity/model_sparsity": 0.13193481773278584, "compression_loss": 51.170188903808594, "distillation_loss": 0.9655022621154785, "epoch": 1.12, "learning_rate": 4.4399830938292484e-05, "loss": 51.8331, "step": 1325, "task_loss": 0.8328261375427246 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4775565224611882, "compression/movement_sparsity/importance_threshold": -0.003210502678941735, "compression/movement_sparsity/linear_layer_sparsity": 0.13875555284638466, "compression/movement_sparsity/model_sparsity": 0.13398887274410237, "compression_loss": 51.48688507080078, "distillation_loss": 0.5974711775779724, "epoch": 1.12, "learning_rate": 4.43956043956044e-05, "loss": 52.1163, "step": 1326, "task_loss": 0.6383416056632996 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48049739355434024, "compression/movement_sparsity/importance_threshold": -0.003201268256961055, "compression/movement_sparsity/linear_layer_sparsity": 0.14075847913251585, "compression/movement_sparsity/model_sparsity": 0.13592299235058297, "compression_loss": 51.80292892456055, "distillation_loss": 1.082674503326416, "epoch": 1.12, "learning_rate": 4.4391377852916316e-05, "loss": 52.5813, "step": 1327, "task_loss": 0.6380428075790405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4834326199830216, "compression/movement_sparsity/importance_threshold": -0.003192051559393317, "compression/movement_sparsity/linear_layer_sparsity": 0.14276274092542224, "compression/movement_sparsity/model_sparsity": 0.13785840158507257, "compression_loss": 52.11836624145508, "distillation_loss": 0.5320615172386169, "epoch": 1.12, "learning_rate": 4.4387151310228236e-05, "loss": 52.6443, "step": 1328, "task_loss": 0.32004961371421814 }, { "compression/movement_sparsity/importance_regularization_factor": 0.48636220716958023, "compression/movement_sparsity/importance_threshold": -0.003182852569212187, "compression/movement_sparsity/linear_layer_sparsity": 0.14468940023535445, "compression/movement_sparsity/model_sparsity": 0.1397188742206113, "compression_loss": 52.43320083618164, "distillation_loss": 0.8700262904167175, "epoch": 1.12, "learning_rate": 4.438292476754015e-05, "loss": 53.2182, "step": 1329, "task_loss": 0.2888091504573822 }, { "compression/movement_sparsity/importance_regularization_factor": 0.489286160536364, "compression/movement_sparsity/importance_threshold": -0.0031736712693913333, "compression/movement_sparsity/linear_layer_sparsity": 0.14658221875753608, "compression/movement_sparsity/model_sparsity": 0.1415466686035651, "compression_loss": 52.74740219116211, "distillation_loss": 0.33761823177337646, "epoch": 1.12, "learning_rate": 4.437869822485207e-05, "loss": 53.3991, "step": 1330, "task_loss": 0.8333884477615356 }, { "compression/movement_sparsity/importance_regularization_factor": 0.49220448550572093, "compression/movement_sparsity/importance_threshold": -0.003164507642904423, "compression/movement_sparsity/linear_layer_sparsity": 0.14862351701511947, "compression/movement_sparsity/model_sparsity": 0.14351784198623255, "compression_loss": 53.060997009277344, "distillation_loss": 0.7438492178916931, "epoch": 1.13, "learning_rate": 4.4374471682163995e-05, "loss": 53.6375, "step": 1331, "task_loss": 1.1630669832229614 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4951171875, "compression/movement_sparsity/importance_threshold": -0.003155361672725121, "compression/movement_sparsity/linear_layer_sparsity": 0.15089827855084542, "compression/movement_sparsity/model_sparsity": 0.1457144584652213, "compression_loss": 53.37394714355469, "distillation_loss": 0.7777900695800781, "epoch": 1.13, "learning_rate": 4.4370245139475915e-05, "loss": 54.312, "step": 1332, "task_loss": 1.1477227210998535 }, { "compression/movement_sparsity/importance_regularization_factor": 0.4980242719415489, "compression/movement_sparsity/importance_threshold": -0.0031462333418270953, "compression/movement_sparsity/linear_layer_sparsity": 0.1530567436796188, "compression/movement_sparsity/model_sparsity": 0.1477987736766059, "compression_loss": 53.68632507324219, "distillation_loss": 0.3847755789756775, "epoch": 1.13, "learning_rate": 4.436601859678783e-05, "loss": 54.2379, "step": 1333, "task_loss": 1.5904806852340698 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5009257442527161, "compression/movement_sparsity/importance_threshold": -0.0031371226331840113, "compression/movement_sparsity/linear_layer_sparsity": 0.15496221374366206, "compression/movement_sparsity/model_sparsity": 0.14963878498203773, "compression_loss": 53.998050689697266, "distillation_loss": 0.34787118434906006, "epoch": 1.13, "learning_rate": 4.436179205409975e-05, "loss": 54.5374, "step": 1334, "task_loss": 0.7878853678703308 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5038216098558489, "compression/movement_sparsity/importance_threshold": -0.003128029529769539, "compression/movement_sparsity/linear_layer_sparsity": 0.1571680178179498, "compression/movement_sparsity/model_sparsity": 0.15176881290052666, "compression_loss": 54.309173583984375, "distillation_loss": 0.9435323476791382, "epoch": 1.13, "learning_rate": 4.435756551141167e-05, "loss": 55.0187, "step": 1335, "task_loss": 0.3080785572528839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5067118741732968, "compression/movement_sparsity/importance_threshold": -0.0031189540145573407, "compression/movement_sparsity/linear_layer_sparsity": 0.15907311823279638, "compression/movement_sparsity/model_sparsity": 0.15360846725534885, "compression_loss": 54.619693756103516, "distillation_loss": 0.4913204610347748, "epoch": 1.13, "learning_rate": 4.435333896872359e-05, "loss": 55.2671, "step": 1336, "task_loss": 0.5756125450134277 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5095965426274073, "compression/movement_sparsity/importance_threshold": -0.003109896070521086, "compression/movement_sparsity/linear_layer_sparsity": 0.1613855005174135, "compression/movement_sparsity/model_sparsity": 0.15584141209476937, "compression_loss": 54.929561614990234, "distillation_loss": 0.6135482788085938, "epoch": 1.13, "learning_rate": 4.434911242603551e-05, "loss": 55.6303, "step": 1337, "task_loss": 0.9360067248344421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5124756206405283, "compression/movement_sparsity/importance_threshold": -0.0031008556806344416, "compression/movement_sparsity/linear_layer_sparsity": 0.16348936488258226, "compression/movement_sparsity/model_sparsity": 0.15787300224675077, "compression_loss": 55.23884582519531, "distillation_loss": 0.6394940614700317, "epoch": 1.13, "learning_rate": 4.434488588334743e-05, "loss": 55.84, "step": 1338, "task_loss": 1.604577660560608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5153491136350088, "compression/movement_sparsity/importance_threshold": -0.0030918328278710726, "compression/movement_sparsity/linear_layer_sparsity": 0.1656997239889069, "compression/movement_sparsity/model_sparsity": 0.1600074287179132, "compression_loss": 55.54752731323242, "distillation_loss": 1.371919870376587, "epoch": 1.13, "learning_rate": 4.434065934065934e-05, "loss": 56.3289, "step": 1339, "task_loss": 1.1545560359954834 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5182170270331966, "compression/movement_sparsity/importance_threshold": -0.003082827495204646, "compression/movement_sparsity/linear_layer_sparsity": 0.16785873762939152, "compression/movement_sparsity/model_sparsity": 0.16209227359794434, "compression_loss": 55.855594635009766, "distillation_loss": 0.6714638471603394, "epoch": 1.13, "learning_rate": 4.433643279797126e-05, "loss": 56.4418, "step": 1340, "task_loss": 1.5307416915893555 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5210793662574396, "compression/movement_sparsity/importance_threshold": -0.0030738396656088307, "compression/movement_sparsity/linear_layer_sparsity": 0.17007337751189747, "compression/movement_sparsity/model_sparsity": 0.16423083378745704, "compression_loss": 56.163063049316406, "distillation_loss": 0.45750027894973755, "epoch": 1.13, "learning_rate": 4.433220625528318e-05, "loss": 56.8211, "step": 1341, "task_loss": 0.32405903935432434 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5239361367300863, "compression/movement_sparsity/importance_threshold": -0.0030648693220572906, "compression/movement_sparsity/linear_layer_sparsity": 0.1722077915945493, "compression/movement_sparsity/model_sparsity": 0.16629192418014405, "compression_loss": 56.469905853271484, "distillation_loss": 0.2561332583427429, "epoch": 1.13, "learning_rate": 4.43279797125951e-05, "loss": 57.3984, "step": 1342, "task_loss": 0.03715972602367401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5267873438734849, "compression/movement_sparsity/importance_threshold": -0.003055916447523694, "compression/movement_sparsity/linear_layer_sparsity": 0.1742794487829336, "compression/movement_sparsity/model_sparsity": 0.16829241357094438, "compression_loss": 56.77617263793945, "distillation_loss": 0.5091273784637451, "epoch": 1.14, "learning_rate": 4.432375316990702e-05, "loss": 57.3105, "step": 1343, "task_loss": 0.34252116084098816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5296329931099835, "compression/movement_sparsity/importance_threshold": -0.003046981024981708, "compression/movement_sparsity/linear_layer_sparsity": 0.17643600604488524, "compression/movement_sparsity/model_sparsity": 0.17037488645660184, "compression_loss": 57.081825256347656, "distillation_loss": 0.5197078585624695, "epoch": 1.14, "learning_rate": 4.431952662721894e-05, "loss": 57.726, "step": 1344, "task_loss": 0.37829092144966125 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5324730898619304, "compression/movement_sparsity/importance_threshold": -0.003038063037404998, "compression/movement_sparsity/linear_layer_sparsity": 0.17847503871061782, "compression/movement_sparsity/model_sparsity": 0.17234387207746832, "compression_loss": 57.38687515258789, "distillation_loss": 0.37463319301605225, "epoch": 1.14, "learning_rate": 4.431530008453086e-05, "loss": 57.8522, "step": 1345, "task_loss": 1.0771381855010986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5353076395516739, "compression/movement_sparsity/importance_threshold": -0.003029162467767231, "compression/movement_sparsity/linear_layer_sparsity": 0.18065012612907558, "compression/movement_sparsity/model_sparsity": 0.1744442385517505, "compression_loss": 57.6912841796875, "distillation_loss": 0.6838735938072205, "epoch": 1.14, "learning_rate": 4.431107354184277e-05, "loss": 58.6726, "step": 1346, "task_loss": 0.6715477108955383 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5381366476015617, "compression/movement_sparsity/importance_threshold": -0.0030202792990420752, "compression/movement_sparsity/linear_layer_sparsity": 0.18289939379439607, "compression/movement_sparsity/model_sparsity": 0.17661623695321052, "compression_loss": 57.995113372802734, "distillation_loss": 0.5443418622016907, "epoch": 1.14, "learning_rate": 4.430684699915469e-05, "loss": 58.6149, "step": 1347, "task_loss": 0.4529528021812439 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5409601194339426, "compression/movement_sparsity/importance_threshold": -0.003011413514203195, "compression/movement_sparsity/linear_layer_sparsity": 0.18485191662393044, "compression/movement_sparsity/model_sparsity": 0.17850168461688762, "compression_loss": 58.298362731933594, "distillation_loss": 0.3564160466194153, "epoch": 1.14, "learning_rate": 4.430262045646662e-05, "loss": 58.8935, "step": 1348, "task_loss": 0.7282147407531738 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5437780604711646, "compression/movement_sparsity/importance_threshold": -0.003002565096224258, "compression/movement_sparsity/linear_layer_sparsity": 0.18723356639834426, "compression/movement_sparsity/model_sparsity": 0.18080151739473863, "compression_loss": 58.60100173950195, "distillation_loss": 0.4090028703212738, "epoch": 1.14, "learning_rate": 4.429839391377853e-05, "loss": 59.1323, "step": 1349, "task_loss": 0.05018392950296402 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5465904761355758, "compression/movement_sparsity/importance_threshold": -0.0029937340280789316, "compression/movement_sparsity/linear_layer_sparsity": 0.1895519107667793, "compression/movement_sparsity/model_sparsity": 0.18304021950205643, "compression_loss": 58.90302658081055, "distillation_loss": 0.6254972815513611, "epoch": 1.14, "learning_rate": 4.429416737109045e-05, "loss": 59.6259, "step": 1350, "task_loss": 1.0108393430709839 }, { "compression/movement_sparsity/importance_regularization_factor": 0.549397371849524, "compression/movement_sparsity/importance_threshold": -0.002984920292740883, "compression/movement_sparsity/linear_layer_sparsity": 0.19198836401564756, "compression/movement_sparsity/model_sparsity": 0.18539297308641917, "compression_loss": 59.204490661621094, "distillation_loss": 0.38609322905540466, "epoch": 1.14, "learning_rate": 4.428994082840237e-05, "loss": 59.99, "step": 1351, "task_loss": 0.8283719420433044 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5521987530353583, "compression/movement_sparsity/importance_threshold": -0.0029761238731837772, "compression/movement_sparsity/linear_layer_sparsity": 0.19427081663949863, "compression/movement_sparsity/model_sparsity": 0.18759701644099538, "compression_loss": 59.505332946777344, "distillation_loss": 0.6423802375793457, "epoch": 1.14, "learning_rate": 4.428571428571428e-05, "loss": 60.2511, "step": 1352, "task_loss": 0.8454609513282776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5549946251154263, "compression/movement_sparsity/importance_threshold": -0.002967344752381282, "compression/movement_sparsity/linear_layer_sparsity": 0.19643606661965682, "compression/movement_sparsity/model_sparsity": 0.18968788342324708, "compression_loss": 59.805545806884766, "distillation_loss": 0.7758719325065613, "epoch": 1.14, "learning_rate": 4.428148774302621e-05, "loss": 60.5515, "step": 1353, "task_loss": 0.945686936378479 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5577849935120762, "compression/movement_sparsity/importance_threshold": -0.002958582913307064, "compression/movement_sparsity/linear_layer_sparsity": 0.19867671311177657, "compression/movement_sparsity/model_sparsity": 0.19185155681532765, "compression_loss": 60.10515594482422, "distillation_loss": 0.69838947057724, "epoch": 1.14, "learning_rate": 4.427726120033813e-05, "loss": 60.8867, "step": 1354, "task_loss": 0.34210434556007385 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5605698636476562, "compression/movement_sparsity/importance_threshold": -0.0029498383389347906, "compression/movement_sparsity/linear_layer_sparsity": 0.2010280755003953, "compression/movement_sparsity/model_sparsity": 0.19412214267226052, "compression_loss": 60.40419387817383, "distillation_loss": 1.1966954469680786, "epoch": 1.15, "learning_rate": 4.427303465765004e-05, "loss": 61.2657, "step": 1355, "task_loss": 0.6600598096847534 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5633492409445147, "compression/movement_sparsity/importance_threshold": -0.002941111012238127, "compression/movement_sparsity/linear_layer_sparsity": 0.2033781262305741, "compression/movement_sparsity/model_sparsity": 0.19639146193025603, "compression_loss": 60.70262908935547, "distillation_loss": 0.7910445928573608, "epoch": 1.15, "learning_rate": 4.426880811496196e-05, "loss": 61.6675, "step": 1356, "task_loss": 0.6780710220336914 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5661231308249999, "compression/movement_sparsity/importance_threshold": -0.00293240091619074, "compression/movement_sparsity/linear_layer_sparsity": 0.20531577962306657, "compression/movement_sparsity/model_sparsity": 0.1982625509677973, "compression_loss": 61.00050354003906, "distillation_loss": 0.5539929866790771, "epoch": 1.15, "learning_rate": 4.426458157227388e-05, "loss": 61.6449, "step": 1357, "task_loss": 0.31943202018737793 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5688915387114595, "compression/movement_sparsity/importance_threshold": -0.0029237080337662985, "compression/movement_sparsity/linear_layer_sparsity": 0.20764713325839232, "compression/movement_sparsity/model_sparsity": 0.20051381543366695, "compression_loss": 61.297794342041016, "distillation_loss": 0.8837694525718689, "epoch": 1.15, "learning_rate": 4.42603550295858e-05, "loss": 62.0661, "step": 1358, "task_loss": 1.372668743133545 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5716544700262425, "compression/movement_sparsity/importance_threshold": -0.002915032347938466, "compression/movement_sparsity/linear_layer_sparsity": 0.20992387420594583, "compression/movement_sparsity/model_sparsity": 0.2027123433255976, "compression_loss": 61.594505310058594, "distillation_loss": 0.8430509567260742, "epoch": 1.15, "learning_rate": 4.425612848689772e-05, "loss": 62.337, "step": 1359, "task_loss": 0.5275387167930603 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5744119301916966, "compression/movement_sparsity/importance_threshold": -0.002906373841680912, "compression/movement_sparsity/linear_layer_sparsity": 0.21205207579725938, "compression/movement_sparsity/model_sparsity": 0.20476743464513567, "compression_loss": 61.890655517578125, "distillation_loss": 0.8673979640007019, "epoch": 1.15, "learning_rate": 4.425190194420964e-05, "loss": 62.8152, "step": 1360, "task_loss": 0.978115975856781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5771639246301696, "compression/movement_sparsity/importance_threshold": -0.002897732497967303, "compression/movement_sparsity/linear_layer_sparsity": 0.21415770493923825, "compression/movement_sparsity/model_sparsity": 0.20680072894841464, "compression_loss": 62.18621063232422, "distillation_loss": 0.9977774620056152, "epoch": 1.15, "learning_rate": 4.424767540152156e-05, "loss": 63.0701, "step": 1361, "task_loss": 1.0909597873687744 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5799104587640107, "compression/movement_sparsity/importance_threshold": -0.002889108299771303, "compression/movement_sparsity/linear_layer_sparsity": 0.21637436000607466, "compression/movement_sparsity/model_sparsity": 0.2089412350944766, "compression_loss": 62.481117248535156, "distillation_loss": 0.8493362665176392, "epoch": 1.15, "learning_rate": 4.4243448858833473e-05, "loss": 63.2991, "step": 1362, "task_loss": 0.7382258772850037 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5826515380155675, "compression/movement_sparsity/importance_threshold": -0.0028805012300665814, "compression/movement_sparsity/linear_layer_sparsity": 0.21853450644248468, "compression/movement_sparsity/model_sparsity": 0.21102717385540823, "compression_loss": 62.77546691894531, "distillation_loss": 1.1857895851135254, "epoch": 1.15, "learning_rate": 4.423922231614539e-05, "loss": 63.5875, "step": 1363, "task_loss": 0.5139634013175964 }, { "compression/movement_sparsity/importance_regularization_factor": 0.585387167807188, "compression/movement_sparsity/importance_threshold": -0.002871911271826805, "compression/movement_sparsity/linear_layer_sparsity": 0.2205499411804659, "compression/movement_sparsity/model_sparsity": 0.2129733722099373, "compression_loss": 63.069217681884766, "distillation_loss": 0.9533974528312683, "epoch": 1.15, "learning_rate": 4.423499577345731e-05, "loss": 63.9281, "step": 1364, "task_loss": 1.2177783250808716 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5881173535612209, "compression/movement_sparsity/importance_threshold": -0.0028633384080256384, "compression/movement_sparsity/linear_layer_sparsity": 0.2226018519472452, "compression/movement_sparsity/model_sparsity": 0.21495479352946187, "compression_loss": 63.36244583129883, "distillation_loss": 0.601244330406189, "epoch": 1.15, "learning_rate": 4.423076923076923e-05, "loss": 64.0464, "step": 1365, "task_loss": 1.0718969106674194 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5908421007000141, "compression/movement_sparsity/importance_threshold": -0.00285478262163675, "compression/movement_sparsity/linear_layer_sparsity": 0.22477162118693736, "compression/movement_sparsity/model_sparsity": 0.2170500245207797, "compression_loss": 63.65501403808594, "distillation_loss": 0.6122133135795593, "epoch": 1.15, "learning_rate": 4.422654268808115e-05, "loss": 64.4711, "step": 1366, "task_loss": 0.8908010125160217 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5935614146459156, "compression/movement_sparsity/importance_threshold": -0.0028462438956338067, "compression/movement_sparsity/linear_layer_sparsity": 0.22698790660457704, "compression/movement_sparsity/model_sparsity": 0.21919017371623203, "compression_loss": 63.94700241088867, "distillation_loss": 0.7002647519111633, "epoch": 1.16, "learning_rate": 4.422231614539307e-05, "loss": 64.7485, "step": 1367, "task_loss": 0.7219159007072449 }, { "compression/movement_sparsity/importance_regularization_factor": 0.596275300821274, "compression/movement_sparsity/importance_threshold": -0.002837722212990474, "compression/movement_sparsity/linear_layer_sparsity": 0.22913723782094136, "compression/movement_sparsity/model_sparsity": 0.221265668793198, "compression_loss": 64.23841094970703, "distillation_loss": 0.7693737149238586, "epoch": 1.16, "learning_rate": 4.4218089602704985e-05, "loss": 65.142, "step": 1368, "task_loss": 0.6063172221183777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.5989837646484375, "compression/movement_sparsity/importance_threshold": -0.002829217556680419, "compression/movement_sparsity/linear_layer_sparsity": 0.23141529042693482, "compression/movement_sparsity/model_sparsity": 0.22346546328406605, "compression_loss": 64.52925109863281, "distillation_loss": 0.6864047050476074, "epoch": 1.16, "learning_rate": 4.4213863060016905e-05, "loss": 65.4941, "step": 1369, "task_loss": 0.6815376281738281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6016868115497541, "compression/movement_sparsity/importance_threshold": -0.0028207299096773084, "compression/movement_sparsity/linear_layer_sparsity": 0.2337316553835423, "compression/movement_sparsity/model_sparsity": 0.22570225397844196, "compression_loss": 64.81948852539062, "distillation_loss": 0.6462836265563965, "epoch": 1.16, "learning_rate": 4.420963651732883e-05, "loss": 65.8191, "step": 1370, "task_loss": 1.1219321489334106 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6043844469475719, "compression/movement_sparsity/importance_threshold": -0.00281225925495481, "compression/movement_sparsity/linear_layer_sparsity": 0.23573174371777614, "compression/movement_sparsity/model_sparsity": 0.22763363312540347, "compression_loss": 65.109130859375, "distillation_loss": 1.332759976387024, "epoch": 1.16, "learning_rate": 4.4205409974640744e-05, "loss": 66.0725, "step": 1371, "task_loss": 1.4456849098205566 }, { "compression/movement_sparsity/importance_regularization_factor": 0.607076676264239, "compression/movement_sparsity/importance_threshold": -0.00280380557548659, "compression/movement_sparsity/linear_layer_sparsity": 0.23804602194504734, "compression/movement_sparsity/model_sparsity": 0.22986840877601533, "compression_loss": 65.39823150634766, "distillation_loss": 1.0189800262451172, "epoch": 1.16, "learning_rate": 4.4201183431952664e-05, "loss": 66.222, "step": 1372, "task_loss": 0.9191982746124268 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6097635049221041, "compression/movement_sparsity/importance_threshold": -0.0027953688542463138, "compression/movement_sparsity/linear_layer_sparsity": 0.24014831232008815, "compression/movement_sparsity/model_sparsity": 0.23189847900927185, "compression_loss": 65.68670654296875, "distillation_loss": 0.9639915227890015, "epoch": 1.16, "learning_rate": 4.4196956889264584e-05, "loss": 66.4569, "step": 1373, "task_loss": 1.041630506515503 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6124449383435151, "compression/movement_sparsity/importance_threshold": -0.00278694907420765, "compression/movement_sparsity/linear_layer_sparsity": 0.24250219070807805, "compression/movement_sparsity/model_sparsity": 0.23417149443325738, "compression_loss": 65.97455596923828, "distillation_loss": 1.028700828552246, "epoch": 1.16, "learning_rate": 4.4192730346576503e-05, "loss": 66.8127, "step": 1374, "task_loss": 1.1086666584014893 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6151209819508199, "compression/movement_sparsity/importance_threshold": -0.0027785462183442643, "compression/movement_sparsity/linear_layer_sparsity": 0.2447815668966791, "compression/movement_sparsity/model_sparsity": 0.2363725670375986, "compression_loss": 66.2618408203125, "distillation_loss": 0.7350916862487793, "epoch": 1.16, "learning_rate": 4.418850380388842e-05, "loss": 66.8838, "step": 1375, "task_loss": 0.8811807632446289 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6177916411663673, "compression/movement_sparsity/importance_threshold": -0.0027701602696298235, "compression/movement_sparsity/linear_layer_sparsity": 0.24691848505453445, "compression/movement_sparsity/model_sparsity": 0.23843607548280246, "compression_loss": 66.54849243164062, "distillation_loss": 0.5729374885559082, "epoch": 1.16, "learning_rate": 4.418427726120034e-05, "loss": 67.3083, "step": 1376, "task_loss": 1.3767502307891846 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6204569214125051, "compression/movement_sparsity/importance_threshold": -0.002761791211037994, "compression/movement_sparsity/linear_layer_sparsity": 0.2489480976278347, "compression/movement_sparsity/model_sparsity": 0.24039596462039126, "compression_loss": 66.83454895019531, "distillation_loss": 0.9451680183410645, "epoch": 1.16, "learning_rate": 4.418005071851226e-05, "loss": 67.8146, "step": 1377, "task_loss": 1.2235774993896484 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6231168281115813, "compression/movement_sparsity/importance_threshold": -0.002753439025542444, "compression/movement_sparsity/linear_layer_sparsity": 0.2511811245768351, "compression/movement_sparsity/model_sparsity": 0.2425522802240991, "compression_loss": 67.12010955810547, "distillation_loss": 1.1316816806793213, "epoch": 1.16, "learning_rate": 4.4175824175824176e-05, "loss": 68.3442, "step": 1378, "task_loss": 0.37538233399391174 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6257713666859449, "compression/movement_sparsity/importance_threshold": -0.002745103696116838, "compression/movement_sparsity/linear_layer_sparsity": 0.2534306188013407, "compression/movement_sparsity/model_sparsity": 0.24472449740173924, "compression_loss": 67.40505981445312, "distillation_loss": 0.8693324327468872, "epoch": 1.17, "learning_rate": 4.4171597633136095e-05, "loss": 68.1973, "step": 1379, "task_loss": 0.3585340678691864 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6284205425579432, "compression/movement_sparsity/importance_threshold": -0.0027367852057348444, "compression/movement_sparsity/linear_layer_sparsity": 0.2556787178982329, "compression/movement_sparsity/model_sparsity": 0.2468953673786914, "compression_loss": 67.68950653076172, "distillation_loss": 0.7742630839347839, "epoch": 1.17, "learning_rate": 4.4167371090448015e-05, "loss": 68.546, "step": 1380, "task_loss": 0.22085602581501007 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6310643611499249, "compression/movement_sparsity/importance_threshold": -0.0027284835373701296, "compression/movement_sparsity/linear_layer_sparsity": 0.2576685752966351, "compression/movement_sparsity/model_sparsity": 0.24881686705394118, "compression_loss": 67.97335815429688, "distillation_loss": 1.2497050762176514, "epoch": 1.17, "learning_rate": 4.4163144547759935e-05, "loss": 69.1566, "step": 1381, "task_loss": 1.7637649774551392 }, { "compression/movement_sparsity/importance_regularization_factor": 0.633702827884238, "compression/movement_sparsity/importance_threshold": -0.0027201986739963597, "compression/movement_sparsity/linear_layer_sparsity": 0.2598670937044974, "compression/movement_sparsity/model_sparsity": 0.25093985959105963, "compression_loss": 68.25659942626953, "distillation_loss": 1.1690393686294556, "epoch": 1.17, "learning_rate": 4.4158918005071855e-05, "loss": 69.3858, "step": 1382, "task_loss": 0.432060569524765 }, { "compression/movement_sparsity/importance_regularization_factor": 0.636335948183231, "compression/movement_sparsity/importance_threshold": -0.0027119305985872015, "compression/movement_sparsity/linear_layer_sparsity": 0.2618242073385716, "compression/movement_sparsity/model_sparsity": 0.25282974035101763, "compression_loss": 68.53935241699219, "distillation_loss": 1.0316088199615479, "epoch": 1.17, "learning_rate": 4.4154691462383774e-05, "loss": 69.5442, "step": 1383, "task_loss": 1.9555219411849976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6389637274692515, "compression/movement_sparsity/importance_threshold": -0.002703679294116324, "compression/movement_sparsity/linear_layer_sparsity": 0.2638772866737792, "compression/movement_sparsity/model_sparsity": 0.2548122900950501, "compression_loss": 68.82148742675781, "distillation_loss": 1.121019959449768, "epoch": 1.17, "learning_rate": 4.415046491969569e-05, "loss": 69.6437, "step": 1384, "task_loss": 1.2901555299758911 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6415861711646482, "compression/movement_sparsity/importance_threshold": -0.0026954447435573897, "compression/movement_sparsity/linear_layer_sparsity": 0.2661049477473435, "compression/movement_sparsity/model_sparsity": 0.2569634241576504, "compression_loss": 69.10302734375, "distillation_loss": 0.8796181678771973, "epoch": 1.17, "learning_rate": 4.414623837700761e-05, "loss": 70.1302, "step": 1385, "task_loss": 1.7014614343643188 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6442032846917692, "compression/movement_sparsity/importance_threshold": -0.0026872269298840686, "compression/movement_sparsity/linear_layer_sparsity": 0.26821227012112664, "compression/movement_sparsity/model_sparsity": 0.2589983535250122, "compression_loss": 69.3840103149414, "distillation_loss": 0.8436229228973389, "epoch": 1.17, "learning_rate": 4.414201183431953e-05, "loss": 70.2836, "step": 1386, "task_loss": 0.7809646725654602 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6468150734729627, "compression/movement_sparsity/importance_threshold": -0.0026790258360700264, "compression/movement_sparsity/linear_layer_sparsity": 0.2704441404258664, "compression/movement_sparsity/model_sparsity": 0.261153552218748, "compression_loss": 69.66437530517578, "distillation_loss": 0.48853152990341187, "epoch": 1.17, "learning_rate": 4.4137785291631447e-05, "loss": 70.4585, "step": 1387, "task_loss": 0.12014459818601608 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6494215429305765, "compression/movement_sparsity/importance_threshold": -0.0026708414450889303, "compression/movement_sparsity/linear_layer_sparsity": 0.2725881534634651, "compression/movement_sparsity/model_sparsity": 0.2632239118127496, "compression_loss": 69.94415283203125, "distillation_loss": 0.686325192451477, "epoch": 1.17, "learning_rate": 4.4133558748943366e-05, "loss": 70.7651, "step": 1388, "task_loss": 0.9187728762626648 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6520226984869595, "compression/movement_sparsity/importance_threshold": -0.0026626737399144456, "compression/movement_sparsity/linear_layer_sparsity": 0.2747935759643885, "compression/movement_sparsity/model_sparsity": 0.2653535712660931, "compression_loss": 70.22332000732422, "distillation_loss": 0.7436723709106445, "epoch": 1.17, "learning_rate": 4.4129332206255286e-05, "loss": 71.1187, "step": 1389, "task_loss": 0.863633930683136 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6546185455644595, "compression/movement_sparsity/importance_threshold": -0.00265452270352024, "compression/movement_sparsity/linear_layer_sparsity": 0.2772152909420588, "compression/movement_sparsity/model_sparsity": 0.26769209288421375, "compression_loss": 70.50190734863281, "distillation_loss": 0.8926471471786499, "epoch": 1.17, "learning_rate": 4.4125105663567206e-05, "loss": 71.53, "step": 1390, "task_loss": 0.5865926146507263 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6572090895854248, "compression/movement_sparsity/importance_threshold": -0.0026463883188799805, "compression/movement_sparsity/linear_layer_sparsity": 0.2794186147894783, "compression/movement_sparsity/model_sparsity": 0.26981972577925745, "compression_loss": 70.77996063232422, "distillation_loss": 0.6902129650115967, "epoch": 1.18, "learning_rate": 4.412087912087912e-05, "loss": 71.827, "step": 1391, "task_loss": 0.9052955508232117 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6597943359722029, "compression/movement_sparsity/importance_threshold": -0.002638270568967335, "compression/movement_sparsity/linear_layer_sparsity": 0.2815503340102445, "compression/movement_sparsity/model_sparsity": 0.2718782138868549, "compression_loss": 71.05741119384766, "distillation_loss": 1.233741283416748, "epoch": 1.18, "learning_rate": 4.4116652578191045e-05, "loss": 72.0276, "step": 1392, "task_loss": 1.1508111953735352 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6623742901471432, "compression/movement_sparsity/importance_threshold": -0.0026301694367559672, "compression/movement_sparsity/linear_layer_sparsity": 0.2837227146426489, "compression/movement_sparsity/model_sparsity": 0.2739759665615117, "compression_loss": 71.33426666259766, "distillation_loss": 0.8052124977111816, "epoch": 1.18, "learning_rate": 4.4112426035502965e-05, "loss": 72.2643, "step": 1393, "task_loss": 0.5793078541755676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6649489575325933, "compression/movement_sparsity/importance_threshold": -0.002622084905219545, "compression/movement_sparsity/linear_layer_sparsity": 0.2860477723174629, "compression/movement_sparsity/model_sparsity": 0.2762211513524818, "compression_loss": 71.61056518554688, "distillation_loss": 1.206644058227539, "epoch": 1.18, "learning_rate": 4.410819949281488e-05, "loss": 72.6324, "step": 1394, "task_loss": 1.0528587102890015 }, { "compression/movement_sparsity/importance_regularization_factor": 0.667518343550901, "compression/movement_sparsity/importance_threshold": -0.002614016957331737, "compression/movement_sparsity/linear_layer_sparsity": 0.28834676376182494, "compression/movement_sparsity/model_sparsity": 0.27844116536820507, "compression_loss": 71.8862533569336, "distillation_loss": 1.1972408294677734, "epoch": 1.18, "learning_rate": 4.41039729501268e-05, "loss": 72.8677, "step": 1395, "task_loss": 0.870022177696228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6700824536244152, "compression/movement_sparsity/importance_threshold": -0.0026059655760662078, "compression/movement_sparsity/linear_layer_sparsity": 0.29041409247735744, "compression/movement_sparsity/model_sparsity": 0.28043747498251204, "compression_loss": 72.16140747070312, "distillation_loss": 2.1123623847961426, "epoch": 1.18, "learning_rate": 4.409974640743872e-05, "loss": 73.4825, "step": 1396, "task_loss": 1.6239508390426636 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6726412931754838, "compression/movement_sparsity/importance_threshold": -0.0025979307443966242, "compression/movement_sparsity/linear_layer_sparsity": 0.2927393647871889, "compression/movement_sparsity/model_sparsity": 0.28268286703512646, "compression_loss": 72.43602752685547, "distillation_loss": 0.9762169122695923, "epoch": 1.18, "learning_rate": 4.409551986475064e-05, "loss": 73.3545, "step": 1397, "task_loss": 1.3902076482772827 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6751948676264545, "compression/movement_sparsity/importance_threshold": -0.0025899124452966552, "compression/movement_sparsity/linear_layer_sparsity": 0.29480649079187154, "compression/movement_sparsity/model_sparsity": 0.28467898090232485, "compression_loss": 72.71007537841797, "distillation_loss": 0.7956358194351196, "epoch": 1.18, "learning_rate": 4.409129332206256e-05, "loss": 73.6023, "step": 1398, "task_loss": 0.37409406900405884 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6777431823996767, "compression/movement_sparsity/importance_threshold": -0.002581910661739964, "compression/movement_sparsity/linear_layer_sparsity": 0.2970966583521831, "compression/movement_sparsity/model_sparsity": 0.28689047416156016, "compression_loss": 72.98355865478516, "distillation_loss": 1.7923378944396973, "epoch": 1.18, "learning_rate": 4.408706677937448e-05, "loss": 74.4578, "step": 1399, "task_loss": 1.2904648780822754 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6802862429174975, "compression/movement_sparsity/importance_threshold": -0.0025739253767002195, "compression/movement_sparsity/linear_layer_sparsity": 0.2993565623750347, "compression/movement_sparsity/model_sparsity": 0.2890727435289489, "compression_loss": 73.2564697265625, "distillation_loss": 1.7499198913574219, "epoch": 1.18, "learning_rate": 4.408284023668639e-05, "loss": 74.432, "step": 1400, "task_loss": 1.7201018333435059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6828240546022653, "compression/movement_sparsity/importance_threshold": -0.002565956573151089, "compression/movement_sparsity/linear_layer_sparsity": 0.30159656496210213, "compression/movement_sparsity/model_sparsity": 0.2912357951360966, "compression_loss": 73.52880859375, "distillation_loss": 0.9857583045959473, "epoch": 1.18, "learning_rate": 4.407861369399831e-05, "loss": 74.4157, "step": 1401, "task_loss": 0.9768823385238647 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6853566228763288, "compression/movement_sparsity/importance_threshold": -0.0025580042340662373, "compression/movement_sparsity/linear_layer_sparsity": 0.3037310148172569, "compression/movement_sparsity/model_sparsity": 0.29329692007239094, "compression_loss": 73.80050659179688, "distillation_loss": 1.4733920097351074, "epoch": 1.19, "learning_rate": 4.407438715131023e-05, "loss": 74.9868, "step": 1402, "task_loss": 0.8372330069541931 }, { "compression/movement_sparsity/importance_regularization_factor": 0.687883953162036, "compression/movement_sparsity/importance_threshold": -0.0025500683424193316, "compression/movement_sparsity/linear_layer_sparsity": 0.30602875422401715, "compression/movement_sparsity/model_sparsity": 0.2955157250618558, "compression_loss": 74.07170104980469, "distillation_loss": 0.8430300354957581, "epoch": 1.19, "learning_rate": 4.407016060862215e-05, "loss": 75.0115, "step": 1403, "task_loss": 1.564890742301941 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6904060508817345, "compression/movement_sparsity/importance_threshold": -0.002542148881184041, "compression/movement_sparsity/linear_layer_sparsity": 0.30824122390801334, "compression/movement_sparsity/model_sparsity": 0.2976521896058539, "compression_loss": 74.3423080444336, "distillation_loss": 1.175536036491394, "epoch": 1.19, "learning_rate": 4.406593406593407e-05, "loss": 75.3343, "step": 1404, "task_loss": 0.9068008661270142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6929229214577731, "compression/movement_sparsity/importance_threshold": -0.0025342458333340286, "compression/movement_sparsity/linear_layer_sparsity": 0.31055848317719353, "compression/movement_sparsity/model_sparsity": 0.29988984389041434, "compression_loss": 74.61233520507812, "distillation_loss": 1.0687832832336426, "epoch": 1.19, "learning_rate": 4.406170752324599e-05, "loss": 75.6561, "step": 1405, "task_loss": 0.6772088408470154 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6954345703125, "compression/movement_sparsity/importance_threshold": -0.002526359181842963, "compression/movement_sparsity/linear_layer_sparsity": 0.31300005189397756, "compression/movement_sparsity/model_sparsity": 0.30224753721063297, "compression_loss": 74.88172149658203, "distillation_loss": 0.8092421293258667, "epoch": 1.19, "learning_rate": 4.405748098055791e-05, "loss": 76.1355, "step": 1406, "task_loss": 1.1415399312973022 }, { "compression/movement_sparsity/importance_regularization_factor": 0.6979410028682633, "compression/movement_sparsity/importance_threshold": -0.002518488909684511, "compression/movement_sparsity/linear_layer_sparsity": 0.3151791935294315, "compression/movement_sparsity/model_sparsity": 0.30435181862708527, "compression_loss": 75.15054321289062, "distillation_loss": 0.9002688527107239, "epoch": 1.19, "learning_rate": 4.405325443786982e-05, "loss": 76.1927, "step": 1407, "task_loss": 0.997353196144104 }, { "compression/movement_sparsity/importance_regularization_factor": 0.700442224547411, "compression/movement_sparsity/importance_threshold": -0.002510634999832339, "compression/movement_sparsity/linear_layer_sparsity": 0.31749029992811156, "compression/movement_sparsity/model_sparsity": 0.3065835314111758, "compression_loss": 75.4188232421875, "distillation_loss": 0.8566809296607971, "epoch": 1.19, "learning_rate": 4.404902789518174e-05, "loss": 76.8891, "step": 1408, "task_loss": 1.3952616453170776 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7029382407722911, "compression/movement_sparsity/importance_threshold": -0.002502797435260115, "compression/movement_sparsity/linear_layer_sparsity": 0.31967678685082923, "compression/movement_sparsity/model_sparsity": 0.3086949057816776, "compression_loss": 75.6865463256836, "distillation_loss": 1.2107224464416504, "epoch": 1.19, "learning_rate": 4.404480135249367e-05, "loss": 76.9155, "step": 1409, "task_loss": 1.0626639127731323 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7054290569652526, "compression/movement_sparsity/importance_threshold": -0.0024949761989415033, "compression/movement_sparsity/linear_layer_sparsity": 0.3218799318357342, "compression/movement_sparsity/model_sparsity": 0.3108223659586843, "compression_loss": 75.95377349853516, "distillation_loss": 1.1467063426971436, "epoch": 1.19, "learning_rate": 4.404057480980558e-05, "loss": 77.4632, "step": 1410, "task_loss": 1.2172901630401611 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7079146785486432, "compression/movement_sparsity/importance_threshold": -0.002487171273850172, "compression/movement_sparsity/linear_layer_sparsity": 0.3240487113695126, "compression/movement_sparsity/model_sparsity": 0.3129166412435312, "compression_loss": 76.22038269042969, "distillation_loss": 1.2668073177337646, "epoch": 1.19, "learning_rate": 4.40363482671175e-05, "loss": 77.3292, "step": 1411, "task_loss": 1.6572024822235107 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7103951109448107, "compression/movement_sparsity/importance_threshold": -0.0024793826429597887, "compression/movement_sparsity/linear_layer_sparsity": 0.32622865192419814, "compression/movement_sparsity/model_sparsity": 0.31502169413388176, "compression_loss": 76.48645782470703, "distillation_loss": 1.4000005722045898, "epoch": 1.19, "learning_rate": 4.403212172442942e-05, "loss": 78.1424, "step": 1412, "task_loss": 1.791578769683838 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7128703595761041, "compression/movement_sparsity/importance_threshold": -0.002471610289244018, "compression/movement_sparsity/linear_layer_sparsity": 0.3284364115619782, "compression/movement_sparsity/model_sparsity": 0.31715361043624096, "compression_loss": 76.75199127197266, "distillation_loss": 0.8285961151123047, "epoch": 1.19, "learning_rate": 4.402789518174133e-05, "loss": 77.8442, "step": 1413, "task_loss": 1.348632574081421 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7153404298648711, "compression/movement_sparsity/importance_threshold": -0.002463854195676528, "compression/movement_sparsity/linear_layer_sparsity": 0.33053977511210625, "compression/movement_sparsity/model_sparsity": 0.319184716977719, "compression_loss": 77.01703643798828, "distillation_loss": 1.1366537809371948, "epoch": 1.2, "learning_rate": 4.402366863905326e-05, "loss": 77.9873, "step": 1414, "task_loss": 1.0489082336425781 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7178053272334597, "compression/movement_sparsity/importance_threshold": -0.002456114345230986, "compression/movement_sparsity/linear_layer_sparsity": 0.3328285356206367, "compression/movement_sparsity/model_sparsity": 0.32139485152173053, "compression_loss": 77.28144836425781, "distillation_loss": 1.08726167678833, "epoch": 1.2, "learning_rate": 4.401944209636518e-05, "loss": 78.482, "step": 1415, "task_loss": 0.9780789017677307 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7202650571042188, "compression/movement_sparsity/importance_threshold": -0.0024483907208810566, "compression/movement_sparsity/linear_layer_sparsity": 0.3350009281772088, "compression/movement_sparsity/model_sparsity": 0.32349261571092314, "compression_loss": 77.5453872680664, "distillation_loss": 1.2207279205322266, "epoch": 1.2, "learning_rate": 4.401521555367709e-05, "loss": 78.9881, "step": 1416, "task_loss": 0.6573187112808228 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7227196248994963, "compression/movement_sparsity/importance_threshold": -0.0024406833056004077, "compression/movement_sparsity/linear_layer_sparsity": 0.3372261209480724, "compression/movement_sparsity/model_sparsity": 0.325641366264614, "compression_loss": 77.80865478515625, "distillation_loss": 1.2851381301879883, "epoch": 1.2, "learning_rate": 4.401098901098901e-05, "loss": 78.8852, "step": 1417, "task_loss": 1.7367502450942993 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7251690360416397, "compression/movement_sparsity/importance_threshold": -0.002432992082362708, "compression/movement_sparsity/linear_layer_sparsity": 0.3393983465662976, "compression/movement_sparsity/model_sparsity": 0.3277389692503055, "compression_loss": 78.07147216796875, "distillation_loss": 0.9510504007339478, "epoch": 1.2, "learning_rate": 4.400676246830093e-05, "loss": 79.328, "step": 1418, "task_loss": 0.9809507727622986 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7276132959529984, "compression/movement_sparsity/importance_threshold": -0.0024253170341416203, "compression/movement_sparsity/linear_layer_sparsity": 0.3416635568437472, "compression/movement_sparsity/model_sparsity": 0.32992636258612285, "compression_loss": 78.33370208740234, "distillation_loss": 1.3271602392196655, "epoch": 1.2, "learning_rate": 4.400253592561285e-05, "loss": 79.8687, "step": 1419, "task_loss": 1.0307344198226929 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7300524100559197, "compression/movement_sparsity/importance_threshold": -0.0024176581439108143, "compression/movement_sparsity/linear_layer_sparsity": 0.3439886979877347, "compression/movement_sparsity/model_sparsity": 0.33217162797884353, "compression_loss": 78.59539794921875, "distillation_loss": 1.4630053043365479, "epoch": 1.2, "learning_rate": 4.399830938292477e-05, "loss": 80.0691, "step": 1420, "task_loss": 1.008732795715332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7324863837727519, "compression/movement_sparsity/importance_threshold": -0.002410015394643956, "compression/movement_sparsity/linear_layer_sparsity": 0.3464207154462424, "compression/movement_sparsity/model_sparsity": 0.3345200981558907, "compression_loss": 78.85662841796875, "distillation_loss": 0.7131186127662659, "epoch": 1.2, "learning_rate": 4.399408284023669e-05, "loss": 80.2534, "step": 1421, "task_loss": 0.5300425291061401 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7349152225258437, "compression/movement_sparsity/importance_threshold": -0.0024023887693147114, "compression/movement_sparsity/linear_layer_sparsity": 0.348494328198119, "compression/movement_sparsity/model_sparsity": 0.33652247593056134, "compression_loss": 79.11727142333984, "distillation_loss": 1.4803380966186523, "epoch": 1.2, "learning_rate": 4.398985629754861e-05, "loss": 80.7196, "step": 1422, "task_loss": 1.6573938131332397 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7373389317375428, "compression/movement_sparsity/importance_threshold": -0.0023947782508967476, "compression/movement_sparsity/linear_layer_sparsity": 0.3507736566900495, "compression/movement_sparsity/model_sparsity": 0.33872350247675936, "compression_loss": 79.3774185180664, "distillation_loss": 0.7501176595687866, "epoch": 1.2, "learning_rate": 4.398562975486052e-05, "loss": 80.4505, "step": 1423, "task_loss": 0.9124040007591248 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7397575168301977, "compression/movement_sparsity/importance_threshold": -0.0023871838223637316, "compression/movement_sparsity/linear_layer_sparsity": 0.35284684017189116, "compression/movement_sparsity/model_sparsity": 0.34072546572814144, "compression_loss": 79.6369400024414, "distillation_loss": 1.8759551048278809, "epoch": 1.2, "learning_rate": 4.398140321217244e-05, "loss": 81.0921, "step": 1424, "task_loss": 1.4439818859100342 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7421709832261562, "compression/movement_sparsity/importance_threshold": -0.0023796054666893308, "compression/movement_sparsity/linear_layer_sparsity": 0.354913763465724, "compression/movement_sparsity/model_sparsity": 0.3427213838482313, "compression_loss": 79.89598846435547, "distillation_loss": 1.3793349266052246, "epoch": 1.2, "learning_rate": 4.397717666948436e-05, "loss": 81.4144, "step": 1425, "task_loss": 1.0169471502304077 }, { "compression/movement_sparsity/importance_regularization_factor": 0.744579336347767, "compression/movement_sparsity/importance_threshold": -0.00237204316684721, "compression/movement_sparsity/linear_layer_sparsity": 0.3569735322589754, "compression/movement_sparsity/model_sparsity": 0.3447103932468445, "compression_loss": 80.1545181274414, "distillation_loss": 1.1719056367874146, "epoch": 1.21, "learning_rate": 4.397295012679628e-05, "loss": 81.2686, "step": 1426, "task_loss": 0.5395320653915405 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7469825816173778, "compression/movement_sparsity/importance_threshold": -0.0023644969058110375, "compression/movement_sparsity/linear_layer_sparsity": 0.3590391558185359, "compression/movement_sparsity/model_sparsity": 0.3467050562825328, "compression_loss": 80.41238403320312, "distillation_loss": 1.6068623065948486, "epoch": 1.21, "learning_rate": 4.39687235841082e-05, "loss": 81.7638, "step": 1427, "task_loss": 1.4671213626861572 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7493807244573374, "compression/movement_sparsity/importance_threshold": -0.002356966666554479, "compression/movement_sparsity/linear_layer_sparsity": 0.361016707475938, "compression/movement_sparsity/model_sparsity": 0.3486146729568426, "compression_loss": 80.669677734375, "distillation_loss": 2.0336830615997314, "epoch": 1.21, "learning_rate": 4.396449704142012e-05, "loss": 82.1165, "step": 1428, "task_loss": 1.1140927076339722 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7517737702899931, "compression/movement_sparsity/importance_threshold": -0.0023494524320512027, "compression/movement_sparsity/linear_layer_sparsity": 0.3631738609462714, "compression/movement_sparsity/model_sparsity": 0.35069772156928986, "compression_loss": 80.92645263671875, "distillation_loss": 1.7074615955352783, "epoch": 1.21, "learning_rate": 4.3960270498732035e-05, "loss": 82.1109, "step": 1429, "task_loss": 0.8876955509185791 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7541617245376939, "compression/movement_sparsity/importance_threshold": -0.0023419541852748743, "compression/movement_sparsity/linear_layer_sparsity": 0.36523402323705473, "compression/movement_sparsity/model_sparsity": 0.35268711094758426, "compression_loss": 81.18262481689453, "distillation_loss": 1.5673458576202393, "epoch": 1.21, "learning_rate": 4.3956043956043955e-05, "loss": 82.2949, "step": 1430, "task_loss": 0.6854345798492432 }, { "compression/movement_sparsity/importance_regularization_factor": 0.756544592622788, "compression/movement_sparsity/importance_threshold": -0.0023344719091991594, "compression/movement_sparsity/linear_layer_sparsity": 0.36724220808111335, "compression/movement_sparsity/model_sparsity": 0.3546263084643502, "compression_loss": 81.43828582763672, "distillation_loss": 0.9449591636657715, "epoch": 1.21, "learning_rate": 4.395181741335588e-05, "loss": 82.4104, "step": 1431, "task_loss": 0.7326794862747192 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7589223799676228, "compression/movement_sparsity/importance_threshold": -0.0023270055867977276, "compression/movement_sparsity/linear_layer_sparsity": 0.3694161984761486, "compression/movement_sparsity/model_sparsity": 0.3567256156013393, "compression_loss": 81.69331359863281, "distillation_loss": 1.7695385217666626, "epoch": 1.21, "learning_rate": 4.39475908706678e-05, "loss": 83.2207, "step": 1432, "task_loss": 1.5717283487319946 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7612950919945474, "compression/movement_sparsity/importance_threshold": -0.002319555201044243, "compression/movement_sparsity/linear_layer_sparsity": 0.3716317565195625, "compression/movement_sparsity/model_sparsity": 0.3588650624101082, "compression_loss": 81.94783020019531, "distillation_loss": 1.4127122163772583, "epoch": 1.21, "learning_rate": 4.3943364327979714e-05, "loss": 83.2395, "step": 1433, "task_loss": 1.1738373041152954 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7636627341259096, "compression/movement_sparsity/importance_threshold": -0.0023121207349123727, "compression/movement_sparsity/linear_layer_sparsity": 0.37405204059711655, "compression/movement_sparsity/model_sparsity": 0.3612022022839335, "compression_loss": 82.20179748535156, "distillation_loss": 1.1583659648895264, "epoch": 1.21, "learning_rate": 4.3939137785291634e-05, "loss": 83.6197, "step": 1434, "task_loss": 0.5853502154350281 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7660253117840572, "compression/movement_sparsity/importance_threshold": -0.002304702171375786, "compression/movement_sparsity/linear_layer_sparsity": 0.37599286581820013, "compression/movement_sparsity/model_sparsity": 0.36307635418799616, "compression_loss": 82.4552001953125, "distillation_loss": 0.9428606629371643, "epoch": 1.21, "learning_rate": 4.393491124260355e-05, "loss": 83.542, "step": 1435, "task_loss": 0.3349272906780243 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7683828303913391, "compression/movement_sparsity/importance_threshold": -0.0022972994934081455, "compression/movement_sparsity/linear_layer_sparsity": 0.37809954428693093, "compression/movement_sparsity/model_sparsity": 0.36511066177042506, "compression_loss": 82.7081527709961, "distillation_loss": 1.9129095077514648, "epoch": 1.21, "learning_rate": 4.393068469991547e-05, "loss": 84.0512, "step": 1436, "task_loss": 0.9742015600204468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7707352953701033, "compression/movement_sparsity/importance_threshold": -0.002289912683983121, "compression/movement_sparsity/linear_layer_sparsity": 0.38036760444044554, "compression/movement_sparsity/model_sparsity": 0.3673008070802973, "compression_loss": 82.96049499511719, "distillation_loss": 1.9891302585601807, "epoch": 1.21, "learning_rate": 4.392645815722739e-05, "loss": 84.2558, "step": 1437, "task_loss": 1.8122395277023315 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7730827121426974, "compression/movement_sparsity/importance_threshold": -0.002282541726074379, "compression/movement_sparsity/linear_layer_sparsity": 0.382474032501656, "compression/movement_sparsity/model_sparsity": 0.3693348728574745, "compression_loss": 83.2123031616211, "distillation_loss": 2.6006221771240234, "epoch": 1.22, "learning_rate": 4.392223161453931e-05, "loss": 85.2433, "step": 1438, "task_loss": 2.2420737743377686 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7754250861314707, "compression/movement_sparsity/importance_threshold": -0.002275186602655584, "compression/movement_sparsity/linear_layer_sparsity": 0.38450653072352414, "compression/movement_sparsity/model_sparsity": 0.37129754851272556, "compression_loss": 83.46353912353516, "distillation_loss": 1.7914044857025146, "epoch": 1.22, "learning_rate": 4.3918005071851226e-05, "loss": 85.5608, "step": 1439, "task_loss": 1.5856698751449585 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7777624227587706, "compression/movement_sparsity/importance_threshold": -0.0022678472967004046, "compression/movement_sparsity/linear_layer_sparsity": 0.3863972625163695, "compression/movement_sparsity/model_sparsity": 0.3731233278519153, "compression_loss": 83.71437072753906, "distillation_loss": 2.3265724182128906, "epoch": 1.22, "learning_rate": 4.3913778529163145e-05, "loss": 85.3337, "step": 1440, "task_loss": 1.059078335762024 }, { "compression/movement_sparsity/importance_regularization_factor": 0.780094727446945, "compression/movement_sparsity/importance_threshold": -0.0022605237911825084, "compression/movement_sparsity/linear_layer_sparsity": 0.38825155405291967, "compression/movement_sparsity/model_sparsity": 0.37491391876971697, "compression_loss": 83.96455383300781, "distillation_loss": 1.8383489847183228, "epoch": 1.22, "learning_rate": 4.3909551986475065e-05, "loss": 85.5167, "step": 1441, "task_loss": 0.8957647085189819 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7824220056183431, "compression/movement_sparsity/importance_threshold": -0.0022532160690755592, "compression/movement_sparsity/linear_layer_sparsity": 0.3900560741137577, "compression/movement_sparsity/model_sparsity": 0.3766564480151122, "compression_loss": 84.21428680419922, "distillation_loss": 1.6380958557128906, "epoch": 1.22, "learning_rate": 4.3905325443786985e-05, "loss": 85.6299, "step": 1442, "task_loss": 1.5284394025802612 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7847442626953125, "compression/movement_sparsity/importance_threshold": -0.002245924113353226, "compression/movement_sparsity/linear_layer_sparsity": 0.3922005521938942, "compression/movement_sparsity/model_sparsity": 0.3787272566760098, "compression_loss": 84.46338653564453, "distillation_loss": 0.9237058758735657, "epoch": 1.22, "learning_rate": 4.3901098901098904e-05, "loss": 86.0037, "step": 1443, "task_loss": 0.7917584180831909 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7870615041002014, "compression/movement_sparsity/importance_threshold": -0.002238647906989175, "compression/movement_sparsity/linear_layer_sparsity": 0.3941746577668495, "compression/movement_sparsity/model_sparsity": 0.38063354564947505, "compression_loss": 84.71204376220703, "distillation_loss": 1.927260398864746, "epoch": 1.22, "learning_rate": 4.3896872358410824e-05, "loss": 86.077, "step": 1444, "task_loss": 1.1530635356903076 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7893737352553581, "compression/movement_sparsity/importance_threshold": -0.002231387432957072, "compression/movement_sparsity/linear_layer_sparsity": 0.3961089962407392, "compression/movement_sparsity/model_sparsity": 0.3825014336460654, "compression_loss": 84.96006774902344, "distillation_loss": 2.230501174926758, "epoch": 1.22, "learning_rate": 4.389264581572274e-05, "loss": 86.6381, "step": 1445, "task_loss": 1.447485327720642 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7916809615831306, "compression/movement_sparsity/importance_threshold": -0.002224142674230586, "compression/movement_sparsity/linear_layer_sparsity": 0.39809811434074804, "compression/movement_sparsity/model_sparsity": 0.38442221942009597, "compression_loss": 85.20756530761719, "distillation_loss": 1.4549471139907837, "epoch": 1.22, "learning_rate": 4.388841927303466e-05, "loss": 86.703, "step": 1446, "task_loss": 1.2901946306228638 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7939831885058674, "compression/movement_sparsity/importance_threshold": -0.0022169136137833814, "compression/movement_sparsity/linear_layer_sparsity": 0.3998693778980497, "compression/movement_sparsity/model_sparsity": 0.38613263462516023, "compression_loss": 85.45455169677734, "distillation_loss": 1.4603662490844727, "epoch": 1.22, "learning_rate": 4.388419273034658e-05, "loss": 87.1138, "step": 1447, "task_loss": 1.4821934700012207 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7962804214459166, "compression/movement_sparsity/importance_threshold": -0.0022097002345891252, "compression/movement_sparsity/linear_layer_sparsity": 0.40187120716075836, "compression/movement_sparsity/model_sparsity": 0.3880656948943477, "compression_loss": 85.70106506347656, "distillation_loss": 1.9548306465148926, "epoch": 1.22, "learning_rate": 4.38799661876585e-05, "loss": 87.0557, "step": 1448, "task_loss": 1.3373868465423584 }, { "compression/movement_sparsity/importance_regularization_factor": 0.7985726658256258, "compression/movement_sparsity/importance_threshold": -0.0022025025196214863, "compression/movement_sparsity/linear_layer_sparsity": 0.40381174620181876, "compression/movement_sparsity/model_sparsity": 0.3899395704495513, "compression_loss": 85.94697570800781, "distillation_loss": 1.0626968145370483, "epoch": 1.22, "learning_rate": 4.3875739644970416e-05, "loss": 87.6983, "step": 1449, "task_loss": 0.2939516305923462 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8008599270673442, "compression/movement_sparsity/importance_threshold": -0.002195320451854129, "compression/movement_sparsity/linear_layer_sparsity": 0.4057403133785727, "compression/movement_sparsity/model_sparsity": 0.3918018854108172, "compression_loss": 86.19231414794922, "distillation_loss": 2.3794965744018555, "epoch": 1.23, "learning_rate": 4.3871513102282336e-05, "loss": 88.0952, "step": 1450, "task_loss": 1.582653284072876 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8031422105934196, "compression/movement_sparsity/importance_threshold": -0.0021881540142607205, "compression/movement_sparsity/linear_layer_sparsity": 0.40775039416528536, "compression/movement_sparsity/model_sparsity": 0.3937429137387745, "compression_loss": 86.43717193603516, "distillation_loss": 1.5740033388137817, "epoch": 1.23, "learning_rate": 4.3867286559594256e-05, "loss": 88.1893, "step": 1451, "task_loss": 1.5919039249420166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8054195218261998, "compression/movement_sparsity/importance_threshold": -0.0021810031898149285, "compression/movement_sparsity/linear_layer_sparsity": 0.40968184699060717, "compression/movement_sparsity/model_sparsity": 0.3956080152177026, "compression_loss": 86.68151092529297, "distillation_loss": 1.3980646133422852, "epoch": 1.23, "learning_rate": 4.386306001690617e-05, "loss": 88.3813, "step": 1452, "task_loss": 2.2090260982513428 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8076918661880335, "compression/movement_sparsity/importance_threshold": -0.0021738679614904184, "compression/movement_sparsity/linear_layer_sparsity": 0.4115200170525137, "compression/movement_sparsity/model_sparsity": 0.3973830384831101, "compression_loss": 86.92528533935547, "distillation_loss": 1.981194257736206, "epoch": 1.23, "learning_rate": 4.3858833474218095e-05, "loss": 88.822, "step": 1453, "task_loss": 1.2608253955841064 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8099592491012688, "compression/movement_sparsity/importance_threshold": -0.002166748312260858, "compression/movement_sparsity/linear_layer_sparsity": 0.41346941575012747, "compression/movement_sparsity/model_sparsity": 0.39926546933840895, "compression_loss": 87.16854858398438, "distillation_loss": 1.5396332740783691, "epoch": 1.23, "learning_rate": 4.3854606931530015e-05, "loss": 88.7082, "step": 1454, "task_loss": 1.3598216772079468 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8122216759882535, "compression/movement_sparsity/importance_threshold": -0.002159644225099914, "compression/movement_sparsity/linear_layer_sparsity": 0.41532618751354583, "compression/movement_sparsity/model_sparsity": 0.4010584552796559, "compression_loss": 87.41122436523438, "distillation_loss": 3.2592477798461914, "epoch": 1.23, "learning_rate": 4.385038038884193e-05, "loss": 89.797, "step": 1455, "task_loss": 1.6531397104263306 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8144791522713364, "compression/movement_sparsity/importance_threshold": -0.0021525556829812523, "compression/movement_sparsity/linear_layer_sparsity": 0.4172355448562384, "compression/movement_sparsity/model_sparsity": 0.4029022203237567, "compression_loss": 87.65345764160156, "distillation_loss": 1.3059204816818237, "epoch": 1.23, "learning_rate": 4.384615384615385e-05, "loss": 89.4426, "step": 1456, "task_loss": 0.5418781042098999 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8167316833728653, "compression/movement_sparsity/importance_threshold": -0.00214548266887854, "compression/movement_sparsity/linear_layer_sparsity": 0.4190575934435012, "compression/movement_sparsity/model_sparsity": 0.40466167593676994, "compression_loss": 87.89512634277344, "distillation_loss": 1.720576286315918, "epoch": 1.23, "learning_rate": 4.384192730346577e-05, "loss": 89.5843, "step": 1457, "task_loss": 0.6119003295898438 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8189792747151883, "compression/movement_sparsity/importance_threshold": -0.0021384251657654446, "compression/movement_sparsity/linear_layer_sparsity": 0.4210763431000852, "compression/movement_sparsity/model_sparsity": 0.40661107533224994, "compression_loss": 88.13621520996094, "distillation_loss": 2.311744451522827, "epoch": 1.23, "learning_rate": 4.383770076077768e-05, "loss": 89.8228, "step": 1458, "task_loss": 1.4281361103057861 }, { "compression/movement_sparsity/importance_regularization_factor": 0.821221931720654, "compression/movement_sparsity/importance_threshold": -0.002131383156615631, "compression/movement_sparsity/linear_layer_sparsity": 0.4230505202180463, "compression/movement_sparsity/model_sparsity": 0.4085174333929299, "compression_loss": 88.37676239013672, "distillation_loss": 1.977921724319458, "epoch": 1.23, "learning_rate": 4.383347421808961e-05, "loss": 90.2036, "step": 1459, "task_loss": 1.280239462852478 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8234596598116104, "compression/movement_sparsity/importance_threshold": -0.0021243566244027678, "compression/movement_sparsity/linear_layer_sparsity": 0.42487977100256114, "compression/movement_sparsity/model_sparsity": 0.410283843785563, "compression_loss": 88.61685943603516, "distillation_loss": 1.7213938236236572, "epoch": 1.23, "learning_rate": 4.3829247675401526e-05, "loss": 91.0051, "step": 1460, "task_loss": 2.0139009952545166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8256924644104057, "compression/movement_sparsity/importance_threshold": -0.0021173455521005208, "compression/movement_sparsity/linear_layer_sparsity": 0.42689720900070516, "compression/movement_sparsity/model_sparsity": 0.4122319765821056, "compression_loss": 88.8563461303711, "distillation_loss": 1.3316011428833008, "epoch": 1.23, "learning_rate": 4.3825021132713446e-05, "loss": 90.1861, "step": 1461, "task_loss": 1.7979121208190918 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8279203509393878, "compression/movement_sparsity/importance_threshold": -0.002110349922682557, "compression/movement_sparsity/linear_layer_sparsity": 0.42897252690855364, "compression/movement_sparsity/model_sparsity": 0.4142360009353948, "compression_loss": 89.09526062011719, "distillation_loss": 2.021014451980591, "epoch": 1.24, "learning_rate": 4.382079459002536e-05, "loss": 90.791, "step": 1462, "task_loss": 1.4371663331985474 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8301433248209054, "compression/movement_sparsity/importance_threshold": -0.002103369719122543, "compression/movement_sparsity/linear_layer_sparsity": 0.43085194266631255, "compression/movement_sparsity/model_sparsity": 0.41605085298011557, "compression_loss": 89.33366394042969, "distillation_loss": 2.145749807357788, "epoch": 1.24, "learning_rate": 4.381656804733728e-05, "loss": 91.4833, "step": 1463, "task_loss": 1.613084316253662 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8323613914773064, "compression/movement_sparsity/importance_threshold": -0.0020964049243941455, "compression/movement_sparsity/linear_layer_sparsity": 0.43277345073582607, "compression/movement_sparsity/model_sparsity": 0.4179063513361911, "compression_loss": 89.57154083251953, "distillation_loss": 1.9125415086746216, "epoch": 1.24, "learning_rate": 4.38123415046492e-05, "loss": 91.5646, "step": 1464, "task_loss": 1.4451509714126587 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8345745563309392, "compression/movement_sparsity/importance_threshold": -0.002089455521471031, "compression/movement_sparsity/linear_layer_sparsity": 0.43474957149311183, "compression/movement_sparsity/model_sparsity": 0.41981458626620555, "compression_loss": 89.80891418457031, "distillation_loss": 1.868727445602417, "epoch": 1.24, "learning_rate": 4.380811496196112e-05, "loss": 91.9819, "step": 1465, "task_loss": 1.6708552837371826 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8367828248041516, "compression/movement_sparsity/importance_threshold": -0.0020825214933268667, "compression/movement_sparsity/linear_layer_sparsity": 0.4366693267099829, "compression/movement_sparsity/model_sparsity": 0.42166839198551925, "compression_loss": 90.04573822021484, "distillation_loss": 2.5361168384552, "epoch": 1.24, "learning_rate": 4.380388841927304e-05, "loss": 91.8456, "step": 1466, "task_loss": 1.4672216176986694 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8389862023192922, "compression/movement_sparsity/importance_threshold": -0.002075602822935319, "compression/movement_sparsity/linear_layer_sparsity": 0.43848878775286865, "compression/movement_sparsity/model_sparsity": 0.42342534894426503, "compression_loss": 90.28205108642578, "distillation_loss": 0.8657584190368652, "epoch": 1.24, "learning_rate": 4.379966187658496e-05, "loss": 91.6539, "step": 1467, "task_loss": 0.7657065391540527 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8411846942987089, "compression/movement_sparsity/importance_threshold": -0.0020686994932700552, "compression/movement_sparsity/linear_layer_sparsity": 0.44055661728344186, "compression/movement_sparsity/model_sparsity": 0.4254221421690753, "compression_loss": 90.51786804199219, "distillation_loss": 1.1345958709716797, "epoch": 1.24, "learning_rate": 4.379543533389687e-05, "loss": 91.8193, "step": 1468, "task_loss": 1.4328489303588867 }, { "compression/movement_sparsity/importance_regularization_factor": 0.84337830616475, "compression/movement_sparsity/importance_threshold": -0.0020618114873047415, "compression/movement_sparsity/linear_layer_sparsity": 0.44240591259375256, "compression/movement_sparsity/model_sparsity": 0.4272079084963791, "compression_loss": 90.7530517578125, "distillation_loss": 2.471135377883911, "epoch": 1.24, "learning_rate": 4.379120879120879e-05, "loss": 92.5652, "step": 1469, "task_loss": 1.656062126159668 }, { "compression/movement_sparsity/importance_regularization_factor": 0.845567043339764, "compression/movement_sparsity/importance_threshold": -0.0020549387880130442, "compression/movement_sparsity/linear_layer_sparsity": 0.44437739484982797, "compression/movement_sparsity/model_sparsity": 0.42911166427196945, "compression_loss": 90.98782348632812, "distillation_loss": 1.2275235652923584, "epoch": 1.24, "learning_rate": 4.378698224852072e-05, "loss": 92.1805, "step": 1470, "task_loss": 0.8283646106719971 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8477509112460988, "compression/movement_sparsity/importance_threshold": -0.00204808137836863, "compression/movement_sparsity/linear_layer_sparsity": 0.4463222504395725, "compression/movement_sparsity/model_sparsity": 0.4309897080891307, "compression_loss": 91.22193145751953, "distillation_loss": 1.943591833114624, "epoch": 1.24, "learning_rate": 4.378275570583263e-05, "loss": 93.0569, "step": 1471, "task_loss": 1.2804784774780273 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8499299153061023, "compression/movement_sparsity/importance_threshold": -0.0020412392413451676, "compression/movement_sparsity/linear_layer_sparsity": 0.4482376294869212, "compression/movement_sparsity/model_sparsity": 0.4328392879738078, "compression_loss": 91.45556640625, "distillation_loss": 1.5874154567718506, "epoch": 1.24, "learning_rate": 4.377852916314455e-05, "loss": 94.0066, "step": 1472, "task_loss": 0.7793343663215637 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8521040609421231, "compression/movement_sparsity/importance_threshold": -0.0020344123599163213, "compression/movement_sparsity/linear_layer_sparsity": 0.4503339220056412, "compression/movement_sparsity/model_sparsity": 0.4348635663955596, "compression_loss": 91.68872833251953, "distillation_loss": 2.8074960708618164, "epoch": 1.24, "learning_rate": 4.377430262045647e-05, "loss": 93.4532, "step": 1473, "task_loss": 1.2144739627838135 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8542733535765097, "compression/movement_sparsity/importance_threshold": -0.0020276007170557586, "compression/movement_sparsity/linear_layer_sparsity": 0.452360601233703, "compression/movement_sparsity/model_sparsity": 0.4368206229573429, "compression_loss": 91.9212875366211, "distillation_loss": 2.3325397968292236, "epoch": 1.25, "learning_rate": 4.377007607776838e-05, "loss": 94.07, "step": 1474, "task_loss": 1.2730668783187866 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8564377986316093, "compression/movement_sparsity/importance_threshold": -0.0020208042957371476, "compression/movement_sparsity/linear_layer_sparsity": 0.4541298496066742, "compression/movement_sparsity/model_sparsity": 0.43852909220585795, "compression_loss": 92.15338134765625, "distillation_loss": 2.2134971618652344, "epoch": 1.25, "learning_rate": 4.37658495350803e-05, "loss": 93.9278, "step": 1475, "task_loss": 1.9229933023452759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.858597401529771, "compression/movement_sparsity/importance_threshold": -0.002014023078934153, "compression/movement_sparsity/linear_layer_sparsity": 0.4562388890605969, "compression/movement_sparsity/model_sparsity": 0.44056567966637417, "compression_loss": 92.38483428955078, "distillation_loss": 2.442070245742798, "epoch": 1.25, "learning_rate": 4.376162299239223e-05, "loss": 94.9096, "step": 1476, "task_loss": 2.572432518005371 }, { "compression/movement_sparsity/importance_regularization_factor": 0.860752167693343, "compression/movement_sparsity/importance_threshold": -0.002007257049620441, "compression/movement_sparsity/linear_layer_sparsity": 0.4579981807535921, "compression/movement_sparsity/model_sparsity": 0.44226453427750073, "compression_loss": 92.61590576171875, "distillation_loss": 2.8263728618621826, "epoch": 1.25, "learning_rate": 4.375739644970415e-05, "loss": 94.9327, "step": 1477, "task_loss": 2.4674713611602783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8629021025446726, "compression/movement_sparsity/importance_threshold": -0.0020005061907696817, "compression/movement_sparsity/linear_layer_sparsity": 0.45975876025669205, "compression/movement_sparsity/model_sparsity": 0.44396463245849305, "compression_loss": 92.84642028808594, "distillation_loss": 1.3342137336730957, "epoch": 1.25, "learning_rate": 4.375316990701606e-05, "loss": 94.4951, "step": 1478, "task_loss": 0.7381772398948669 }, { "compression/movement_sparsity/importance_regularization_factor": 0.865047211506109, "compression/movement_sparsity/importance_threshold": -0.001993770485355539, "compression/movement_sparsity/linear_layer_sparsity": 0.46180760651238895, "compression/movement_sparsity/model_sparsity": 0.44594309454231845, "compression_loss": 93.07637023925781, "distillation_loss": 1.8354148864746094, "epoch": 1.25, "learning_rate": 4.374894336432798e-05, "loss": 94.6196, "step": 1479, "task_loss": 1.4156618118286133 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8671875, "compression/movement_sparsity/importance_threshold": -0.0019870499163516797, "compression/movement_sparsity/linear_layer_sparsity": 0.4635489165605893, "compression/movement_sparsity/model_sparsity": 0.4476245852334668, "compression_loss": 93.30579376220703, "distillation_loss": 0.9454125761985779, "epoch": 1.25, "learning_rate": 4.37447168216399e-05, "loss": 94.7726, "step": 1480, "task_loss": 0.47018828988075256 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8693229734486936, "compression/movement_sparsity/importance_threshold": -0.001980344466731772, "compression/movement_sparsity/linear_layer_sparsity": 0.46562029949311795, "compression/movement_sparsity/model_sparsity": 0.4496248097899439, "compression_loss": 93.53468322753906, "distillation_loss": 2.756983757019043, "epoch": 1.25, "learning_rate": 4.374049027895182e-05, "loss": 95.5078, "step": 1481, "task_loss": 2.7871975898742676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8714536372745385, "compression/movement_sparsity/importance_threshold": -0.0019736541194694808, "compression/movement_sparsity/linear_layer_sparsity": 0.4675978153780171, "compression/movement_sparsity/model_sparsity": 0.4515343919206463, "compression_loss": 93.76298522949219, "distillation_loss": 2.8619112968444824, "epoch": 1.25, "learning_rate": 4.373626373626374e-05, "loss": 95.9601, "step": 1482, "task_loss": 2.1617841720581055 }, { "compression/movement_sparsity/importance_regularization_factor": 0.873579496899882, "compression/movement_sparsity/importance_threshold": -0.0019669788575384754, "compression/movement_sparsity/linear_layer_sparsity": 0.469544721924595, "compression/movement_sparsity/model_sparsity": 0.4534144162379642, "compression_loss": 93.99088287353516, "distillation_loss": 1.434727430343628, "epoch": 1.25, "learning_rate": 4.373203719357566e-05, "loss": 95.6824, "step": 1483, "task_loss": 1.8588767051696777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8757005577470733, "compression/movement_sparsity/importance_threshold": -0.00196031866391242, "compression/movement_sparsity/linear_layer_sparsity": 0.4713988584469659, "compression/movement_sparsity/model_sparsity": 0.4552048574668005, "compression_loss": 94.21818542480469, "distillation_loss": 1.7201147079467773, "epoch": 1.25, "learning_rate": 4.372781065088757e-05, "loss": 96.1279, "step": 1484, "task_loss": 0.8511843085289001 }, { "compression/movement_sparsity/importance_regularization_factor": 0.87781682523846, "compression/movement_sparsity/importance_threshold": -0.0019536735215649826, "compression/movement_sparsity/linear_layer_sparsity": 0.4732601136974154, "compression/movement_sparsity/model_sparsity": 0.4570021728735062, "compression_loss": 94.44505310058594, "distillation_loss": 1.5092852115631104, "epoch": 1.26, "learning_rate": 4.372358410819949e-05, "loss": 96.9364, "step": 1485, "task_loss": 1.259634256362915 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8799283047963904, "compression/movement_sparsity/importance_threshold": -0.0019470434134698299, "compression/movement_sparsity/linear_layer_sparsity": 0.4750041901525072, "compression/movement_sparsity/model_sparsity": 0.4586863349369589, "compression_loss": 94.6713638305664, "distillation_loss": 1.6528480052947998, "epoch": 1.26, "learning_rate": 4.371935756551141e-05, "loss": 96.3093, "step": 1486, "task_loss": 1.1547406911849976 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8820350018432129, "compression/movement_sparsity/importance_threshold": -0.0019404283226006278, "compression/movement_sparsity/linear_layer_sparsity": 0.4768658031279858, "compression/movement_sparsity/model_sparsity": 0.4604839957797384, "compression_loss": 94.89723205566406, "distillation_loss": 2.292375087738037, "epoch": 1.26, "learning_rate": 4.371513102282333e-05, "loss": 96.9592, "step": 1487, "task_loss": 0.9253711104393005 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8841369218012756, "compression/movement_sparsity/importance_threshold": -0.0019338282319310432, "compression/movement_sparsity/linear_layer_sparsity": 0.47882526582308427, "compression/movement_sparsity/model_sparsity": 0.4623761449032479, "compression_loss": 95.12249755859375, "distillation_loss": 3.1671009063720703, "epoch": 1.26, "learning_rate": 4.371090448013525e-05, "loss": 97.1826, "step": 1488, "task_loss": 1.5423450469970703 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8862340700929262, "compression/movement_sparsity/importance_threshold": -0.0019272431244347446, "compression/movement_sparsity/linear_layer_sparsity": 0.4806814175297856, "compression/movement_sparsity/model_sparsity": 0.4641685320886335, "compression_loss": 95.34727478027344, "distillation_loss": 2.4407575130462646, "epoch": 1.26, "learning_rate": 4.370667793744717e-05, "loss": 97.8708, "step": 1489, "task_loss": 1.1997573375701904 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8883264521405138, "compression/movement_sparsity/importance_threshold": -0.001920672983085396, "compression/movement_sparsity/linear_layer_sparsity": 0.4825638143294535, "compression/movement_sparsity/model_sparsity": 0.4659862627673029, "compression_loss": 95.57157897949219, "distillation_loss": 2.453951358795166, "epoch": 1.26, "learning_rate": 4.370245139475909e-05, "loss": 97.779, "step": 1490, "task_loss": 1.8541613817214966 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8904140733663858, "compression/movement_sparsity/importance_threshold": -0.0019141177908566665, "compression/movement_sparsity/linear_layer_sparsity": 0.4842969920953262, "compression/movement_sparsity/model_sparsity": 0.4676599005450394, "compression_loss": 95.79537200927734, "distillation_loss": 2.0961103439331055, "epoch": 1.26, "learning_rate": 4.3698224852071004e-05, "loss": 98.0075, "step": 1491, "task_loss": 1.0674598217010498 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8924969391928907, "compression/movement_sparsity/importance_threshold": -0.0019075775307222215, "compression/movement_sparsity/linear_layer_sparsity": 0.48607699606750493, "compression/movement_sparsity/model_sparsity": 0.46937875590484107, "compression_loss": 96.01873016357422, "distillation_loss": 2.028595209121704, "epoch": 1.26, "learning_rate": 4.3693998309382924e-05, "loss": 98.1846, "step": 1492, "task_loss": 1.8315556049346924 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8945750550423769, "compression/movement_sparsity/importance_threshold": -0.001901052185655728, "compression/movement_sparsity/linear_layer_sparsity": 0.4879725452240751, "compression/movement_sparsity/model_sparsity": 0.47120918711649185, "compression_loss": 96.24153137207031, "distillation_loss": 2.286721706390381, "epoch": 1.26, "learning_rate": 4.368977176669485e-05, "loss": 98.6716, "step": 1493, "task_loss": 2.710057020187378 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8966484263371924, "compression/movement_sparsity/importance_threshold": -0.0018945417386308522, "compression/movement_sparsity/linear_layer_sparsity": 0.4897690760925972, "compression/movement_sparsity/model_sparsity": 0.47294400162290473, "compression_loss": 96.46377563476562, "distillation_loss": 2.9375553131103516, "epoch": 1.26, "learning_rate": 4.3685545224006764e-05, "loss": 99.0742, "step": 1494, "task_loss": 2.3950765132904053 }, { "compression/movement_sparsity/importance_regularization_factor": 0.8987170584996854, "compression/movement_sparsity/importance_threshold": -0.0018880461726212615, "compression/movement_sparsity/linear_layer_sparsity": 0.4915393976406556, "compression/movement_sparsity/model_sparsity": 0.4746535071796412, "compression_loss": 96.68563842773438, "distillation_loss": 1.6906776428222656, "epoch": 1.26, "learning_rate": 4.3681318681318683e-05, "loss": 98.3729, "step": 1495, "task_loss": 1.2968608140945435 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9007809569522041, "compression/movement_sparsity/importance_threshold": -0.0018815654706006227, "compression/movement_sparsity/linear_layer_sparsity": 0.49322533185435496, "compression/movement_sparsity/model_sparsity": 0.47628152436655974, "compression_loss": 96.9068603515625, "distillation_loss": 2.6485891342163086, "epoch": 1.26, "learning_rate": 4.36770921386306e-05, "loss": 99.156, "step": 1496, "task_loss": 1.5425375699996948 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9028401271170969, "compression/movement_sparsity/importance_threshold": -0.0018750996155426011, "compression/movement_sparsity/linear_layer_sparsity": 0.4950823182527908, "compression/movement_sparsity/model_sparsity": 0.47807471756945097, "compression_loss": 97.12759399414062, "distillation_loss": 2.005417823791504, "epoch": 1.27, "learning_rate": 4.3672865595942516e-05, "loss": 99.6693, "step": 1497, "task_loss": 0.7923761606216431 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9048945744167113, "compression/movement_sparsity/importance_threshold": -0.0018686485904208664, "compression/movement_sparsity/linear_layer_sparsity": 0.4969240774891557, "compression/movement_sparsity/model_sparsity": 0.4798532067101326, "compression_loss": 97.34791564941406, "distillation_loss": 2.7149128913879395, "epoch": 1.27, "learning_rate": 4.366863905325444e-05, "loss": 100.0436, "step": 1498, "task_loss": 1.74092435836792 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9069443042733967, "compression/movement_sparsity/importance_threshold": -0.0018622123782090815, "compression/movement_sparsity/linear_layer_sparsity": 0.49890451479512565, "compression/movement_sparsity/model_sparsity": 0.4817656099021047, "compression_loss": 97.56766510009766, "distillation_loss": 3.3202428817749023, "epoch": 1.27, "learning_rate": 4.366441251056636e-05, "loss": 100.1968, "step": 1499, "task_loss": 1.7851262092590332 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9089893221095, "compression/movement_sparsity/importance_threshold": -0.0018557909618809165, "compression/movement_sparsity/linear_layer_sparsity": 0.5006381456793685, "compression/movement_sparsity/model_sparsity": 0.4834396852322014, "compression_loss": 97.78693389892578, "distillation_loss": 2.261439800262451, "epoch": 1.27, "learning_rate": 4.3660185967878275e-05, "loss": 100.1552, "step": 1500, "task_loss": 2.0748744010925293 }, { "epoch": 1.27, "eval_accuracy": 0.7445148514851485, "eval_loss": 99.59747314453125, "eval_runtime": 376.0733, "eval_samples_per_second": 67.141, "eval_steps_per_second": 0.526, "step": 1500 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9110296333473702, "compression/movement_sparsity/importance_threshold": -0.0018493843244100365, "compression/movement_sparsity/linear_layer_sparsity": 0.5023965669081263, "compression/movement_sparsity/model_sparsity": 0.48513769928221495, "compression_loss": 98.0056381225586, "distillation_loss": 2.453676700592041, "epoch": 1.27, "learning_rate": 4.3655959425190195e-05, "loss": 100.7752, "step": 1501, "task_loss": 1.9804919958114624 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9130652434093551, "compression/movement_sparsity/importance_threshold": -0.0018429924487701086, "compression/movement_sparsity/linear_layer_sparsity": 0.5041724093458001, "compression/movement_sparsity/model_sparsity": 0.4868525360690243, "compression_loss": 98.22395324707031, "distillation_loss": 3.336181163787842, "epoch": 1.27, "learning_rate": 4.3651732882502115e-05, "loss": 100.9461, "step": 1502, "task_loss": 1.8443694114685059 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9150961577178031, "compression/movement_sparsity/importance_threshold": -0.0018366153179347998, "compression/movement_sparsity/linear_layer_sparsity": 0.5058707566180084, "compression/movement_sparsity/model_sparsity": 0.48849253988770497, "compression_loss": 98.44169616699219, "distillation_loss": 1.8525476455688477, "epoch": 1.27, "learning_rate": 4.3647506339814035e-05, "loss": 100.3817, "step": 1503, "task_loss": 0.7774598002433777 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9171223816950625, "compression/movement_sparsity/importance_threshold": -0.0018302529148777756, "compression/movement_sparsity/linear_layer_sparsity": 0.5078079568921308, "compression/movement_sparsity/model_sparsity": 0.49036319137288603, "compression_loss": 98.65892791748047, "distillation_loss": 2.2874534130096436, "epoch": 1.27, "learning_rate": 4.3643279797125954e-05, "loss": 101.0286, "step": 1504, "task_loss": 0.7870293259620667 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9191439207634815, "compression/movement_sparsity/importance_threshold": -0.0018239052225727031, "compression/movement_sparsity/linear_layer_sparsity": 0.5097417468543092, "compression/movement_sparsity/model_sparsity": 0.4922305497008299, "compression_loss": 98.87566375732422, "distillation_loss": 3.596014976501465, "epoch": 1.27, "learning_rate": 4.3639053254437874e-05, "loss": 101.5716, "step": 1505, "task_loss": 1.4280626773834229 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9211607803454076, "compression/movement_sparsity/importance_threshold": -0.0018175722239932514, "compression/movement_sparsity/linear_layer_sparsity": 0.5114834146275387, "compression/movement_sparsity/model_sparsity": 0.49391238582805214, "compression_loss": 99.09185028076172, "distillation_loss": 1.1857656240463257, "epoch": 1.27, "learning_rate": 4.3634826711749794e-05, "loss": 101.5815, "step": 1506, "task_loss": 1.2029460668563843 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9231729658631901, "compression/movement_sparsity/importance_threshold": -0.0018112539021130835, "compression/movement_sparsity/linear_layer_sparsity": 0.5132793731360141, "compression/movement_sparsity/model_sparsity": 0.4956466476367469, "compression_loss": 99.30754089355469, "distillation_loss": 3.269878387451172, "epoch": 1.27, "learning_rate": 4.363060016906171e-05, "loss": 101.7911, "step": 1507, "task_loss": 1.5143624544143677 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9251804827391765, "compression/movement_sparsity/importance_threshold": -0.0018049502399058682, "compression/movement_sparsity/linear_layer_sparsity": 0.5150177259906408, "compression/movement_sparsity/model_sparsity": 0.49732528272301824, "compression_loss": 99.52271270751953, "distillation_loss": 4.235038757324219, "epoch": 1.27, "learning_rate": 4.3626373626373626e-05, "loss": 102.5151, "step": 1508, "task_loss": 3.344265937805176 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9271833363957147, "compression/movement_sparsity/importance_threshold": -0.0017986612203452733, "compression/movement_sparsity/linear_layer_sparsity": 0.5166853685311867, "compression/movement_sparsity/model_sparsity": 0.49893563661202794, "compression_loss": 99.73743438720703, "distillation_loss": 2.83672833442688, "epoch": 1.28, "learning_rate": 4.3622147083685546e-05, "loss": 102.6316, "step": 1509, "task_loss": 1.4607300758361816 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9291815322551539, "compression/movement_sparsity/importance_threshold": -0.0017923868264049627, "compression/movement_sparsity/linear_layer_sparsity": 0.518119845897781, "compression/movement_sparsity/model_sparsity": 0.5003208352681112, "compression_loss": 99.95162200927734, "distillation_loss": 2.564958095550537, "epoch": 1.28, "learning_rate": 4.3617920540997466e-05, "loss": 102.647, "step": 1510, "task_loss": 2.469444990158081 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9311750757398416, "compression/movement_sparsity/importance_threshold": -0.0017861270410586046, "compression/movement_sparsity/linear_layer_sparsity": 0.5200264487577497, "compression/movement_sparsity/model_sparsity": 0.5021619404544435, "compression_loss": 100.16531372070312, "distillation_loss": 2.4818615913391113, "epoch": 1.28, "learning_rate": 4.3613693998309386e-05, "loss": 102.6456, "step": 1511, "task_loss": 1.5539356470108032 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9331639722721259, "compression/movement_sparsity/importance_threshold": -0.0017798818472798666, "compression/movement_sparsity/linear_layer_sparsity": 0.5218575000915776, "compression/movement_sparsity/model_sparsity": 0.5039300895419816, "compression_loss": 100.37849426269531, "distillation_loss": 2.1918630599975586, "epoch": 1.28, "learning_rate": 4.3609467455621305e-05, "loss": 102.4499, "step": 1512, "task_loss": 1.3998414278030396 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9351482272743554, "compression/movement_sparsity/importance_threshold": -0.0017736512280424137, "compression/movement_sparsity/linear_layer_sparsity": 0.5234306436036094, "compression/movement_sparsity/model_sparsity": 0.5054491907348195, "compression_loss": 100.59117889404297, "distillation_loss": 2.5820369720458984, "epoch": 1.28, "learning_rate": 4.360524091293322e-05, "loss": 103.1039, "step": 1513, "task_loss": 1.0723028182983398 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9371278461688781, "compression/movement_sparsity/importance_threshold": -0.001767435166319914, "compression/movement_sparsity/linear_layer_sparsity": 0.5252309663574396, "compression/movement_sparsity/model_sparsity": 0.5071876668636152, "compression_loss": 100.80329895019531, "distillation_loss": 4.318636417388916, "epoch": 1.28, "learning_rate": 4.360101437024514e-05, "loss": 104.475, "step": 1514, "task_loss": 1.7804205417633057 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9391028343780421, "compression/movement_sparsity/importance_threshold": -0.0017612336450860337, "compression/movement_sparsity/linear_layer_sparsity": 0.5269607457355361, "compression/movement_sparsity/model_sparsity": 0.5088580229986502, "compression_loss": 101.01500701904297, "distillation_loss": 3.642416000366211, "epoch": 1.28, "learning_rate": 4.3596787827557065e-05, "loss": 103.5358, "step": 1515, "task_loss": 1.6169562339782715 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9410731973241961, "compression/movement_sparsity/importance_threshold": -0.0017550466473144383, "compression/movement_sparsity/linear_layer_sparsity": 0.5290192744153533, "compression/movement_sparsity/model_sparsity": 0.5108458348855407, "compression_loss": 101.22613525390625, "distillation_loss": 3.3107755184173584, "epoch": 1.28, "learning_rate": 4.359256128486898e-05, "loss": 104.0058, "step": 1516, "task_loss": 2.4476919174194336 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9430389404296875, "compression/movement_sparsity/importance_threshold": -0.0017488741559787968, "compression/movement_sparsity/linear_layer_sparsity": 0.5309074544363246, "compression/movement_sparsity/model_sparsity": 0.5126691501140704, "compression_loss": 101.43675231933594, "distillation_loss": 1.8777174949645996, "epoch": 1.28, "learning_rate": 4.35883347421809e-05, "loss": 103.4714, "step": 1517, "task_loss": 0.9147438406944275 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9450000691168647, "compression/movement_sparsity/importance_threshold": -0.0017427161540527754, "compression/movement_sparsity/linear_layer_sparsity": 0.5325703630823191, "compression/movement_sparsity/model_sparsity": 0.5142749327323698, "compression_loss": 101.64681243896484, "distillation_loss": 3.4031283855438232, "epoch": 1.28, "learning_rate": 4.358410819949282e-05, "loss": 104.4431, "step": 1518, "task_loss": 1.4187113046646118 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9469565888080768, "compression/movement_sparsity/importance_threshold": -0.0017365726245100383, "compression/movement_sparsity/linear_layer_sparsity": 0.5342949792958293, "compression/movement_sparsity/model_sparsity": 0.5159403030734058, "compression_loss": 101.85639953613281, "distillation_loss": 3.1535377502441406, "epoch": 1.28, "learning_rate": 4.357988165680474e-05, "loss": 104.5497, "step": 1519, "task_loss": 1.2348978519439697 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9489085049256708, "compression/movement_sparsity/importance_threshold": -0.0017304435503242557, "compression/movement_sparsity/linear_layer_sparsity": 0.5360750071163433, "compression/movement_sparsity/model_sparsity": 0.5176591814622791, "compression_loss": 102.06546783447266, "distillation_loss": 2.6222758293151855, "epoch": 1.28, "learning_rate": 4.3575655114116657e-05, "loss": 105.0114, "step": 1520, "task_loss": 2.132873773574829 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9508558228919954, "compression/movement_sparsity/importance_threshold": -0.0017243289144690923, "compression/movement_sparsity/linear_layer_sparsity": 0.5379344141208092, "compression/movement_sparsity/model_sparsity": 0.5194547121159365, "compression_loss": 102.27413940429688, "distillation_loss": 3.047675848007202, "epoch": 1.29, "learning_rate": 4.3571428571428576e-05, "loss": 104.9919, "step": 1521, "task_loss": 2.442349433898926 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9527985481293989, "compression/movement_sparsity/importance_threshold": -0.0017182286999182154, "compression/movement_sparsity/linear_layer_sparsity": 0.5396139569806557, "compression/movement_sparsity/model_sparsity": 0.5210765575116693, "compression_loss": 102.4822998046875, "distillation_loss": 2.3306033611297607, "epoch": 1.29, "learning_rate": 4.3567202028740496e-05, "loss": 105.1335, "step": 1522, "task_loss": 1.3816114664077759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9547366860602291, "compression/movement_sparsity/importance_threshold": -0.0017121428896452922, "compression/movement_sparsity/linear_layer_sparsity": 0.5412530769122168, "compression/movement_sparsity/model_sparsity": 0.5226593686310583, "compression_loss": 102.68992614746094, "distillation_loss": 2.8695499897003174, "epoch": 1.29, "learning_rate": 4.356297548605241e-05, "loss": 105.2621, "step": 1523, "task_loss": 1.7741518020629883 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9566702421068347, "compression/movement_sparsity/importance_threshold": -0.0017060714666239881, "compression/movement_sparsity/linear_layer_sparsity": 0.5430503351549646, "compression/movement_sparsity/model_sparsity": 0.5243948855241547, "compression_loss": 102.89703369140625, "distillation_loss": 2.940119504928589, "epoch": 1.29, "learning_rate": 4.355874894336433e-05, "loss": 105.4771, "step": 1524, "task_loss": 1.316749930381775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9585992216915636, "compression/movement_sparsity/importance_threshold": -0.0017000144138279707, "compression/movement_sparsity/linear_layer_sparsity": 0.5446679558122781, "compression/movement_sparsity/model_sparsity": 0.5259569359355063, "compression_loss": 103.10370635986328, "distillation_loss": 2.3594701290130615, "epoch": 1.29, "learning_rate": 4.355452240067625e-05, "loss": 105.4503, "step": 1525, "task_loss": 1.0633206367492676 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9605236302367641, "compression/movement_sparsity/importance_threshold": -0.001693971714230907, "compression/movement_sparsity/linear_layer_sparsity": 0.5464578926160976, "compression/movement_sparsity/model_sparsity": 0.5276853829036249, "compression_loss": 103.30982971191406, "distillation_loss": 2.6086926460266113, "epoch": 1.29, "learning_rate": 4.355029585798817e-05, "loss": 105.8107, "step": 1526, "task_loss": 1.6839449405670166 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9624434731647843, "compression/movement_sparsity/importance_threshold": -0.0016879433508064628, "compression/movement_sparsity/linear_layer_sparsity": 0.5484329640466313, "compression/movement_sparsity/model_sparsity": 0.5295926045544894, "compression_loss": 103.51546478271484, "distillation_loss": 3.4850447177886963, "epoch": 1.29, "learning_rate": 4.354606931530009e-05, "loss": 106.2459, "step": 1527, "task_loss": 1.6158456802368164 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9643587558979725, "compression/movement_sparsity/importance_threshold": -0.0016819293065283053, "compression/movement_sparsity/linear_layer_sparsity": 0.5500547343142821, "compression/movement_sparsity/model_sparsity": 0.5311586620242975, "compression_loss": 103.72052764892578, "distillation_loss": 3.682271718978882, "epoch": 1.29, "learning_rate": 4.354184277261201e-05, "loss": 107.3768, "step": 1528, "task_loss": 1.9275528192520142 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9662694838586766, "compression/movement_sparsity/importance_threshold": -0.0016759295643701023, "compression/movement_sparsity/linear_layer_sparsity": 0.5517485861752918, "compression/movement_sparsity/model_sparsity": 0.5327943248629836, "compression_loss": 103.92520141601562, "distillation_loss": 2.254791259765625, "epoch": 1.29, "learning_rate": 4.353761622992392e-05, "loss": 106.4485, "step": 1529, "task_loss": 2.2280333042144775 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9681756624692455, "compression/movement_sparsity/importance_threshold": -0.0016699441073055182, "compression/movement_sparsity/linear_layer_sparsity": 0.5536855837385642, "compression/movement_sparsity/model_sparsity": 0.5346647806010563, "compression_loss": 104.12935638427734, "distillation_loss": 3.466214656829834, "epoch": 1.29, "learning_rate": 4.353338968723584e-05, "loss": 106.8591, "step": 1530, "task_loss": 1.6891735792160034 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9700772971520265, "compression/movement_sparsity/importance_threshold": -0.001663972918308222, "compression/movement_sparsity/linear_layer_sparsity": 0.5556070918080778, "compression/movement_sparsity/model_sparsity": 0.5365202789571317, "compression_loss": 104.33299255371094, "distillation_loss": 2.9059624671936035, "epoch": 1.29, "learning_rate": 4.352916314454776e-05, "loss": 107.7146, "step": 1531, "task_loss": 1.1788159608840942 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9719743933293683, "compression/movement_sparsity/importance_threshold": -0.0016580159803518792, "compression/movement_sparsity/linear_layer_sparsity": 0.557376697906078, "compression/movement_sparsity/model_sparsity": 0.5382290936417206, "compression_loss": 104.53616333007812, "distillation_loss": 3.8276565074920654, "epoch": 1.29, "learning_rate": 4.352493660185968e-05, "loss": 107.5411, "step": 1532, "task_loss": 2.6111137866973877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9738669564236192, "compression/movement_sparsity/importance_threshold": -0.0016520732764101565, "compression/movement_sparsity/linear_layer_sparsity": 0.5592526675793902, "compression/movement_sparsity/model_sparsity": 0.5400406179855967, "compression_loss": 104.73873138427734, "distillation_loss": 2.8210947513580322, "epoch": 1.3, "learning_rate": 4.35207100591716e-05, "loss": 107.4569, "step": 1533, "task_loss": 1.7577792406082153 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9757549918571271, "compression/movement_sparsity/importance_threshold": -0.001646144789456721, "compression/movement_sparsity/linear_layer_sparsity": 0.5609809922090351, "compression/movement_sparsity/model_sparsity": 0.5417095693472648, "compression_loss": 104.9408950805664, "distillation_loss": 3.0811879634857178, "epoch": 1.3, "learning_rate": 4.351648351648352e-05, "loss": 107.7517, "step": 1534, "task_loss": 2.302928924560547 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9776385050522403, "compression/movement_sparsity/importance_threshold": -0.0016402305024652395, "compression/movement_sparsity/linear_layer_sparsity": 0.5628178625366692, "compression/movement_sparsity/model_sparsity": 0.5434833375282706, "compression_loss": 105.142578125, "distillation_loss": 2.310521125793457, "epoch": 1.3, "learning_rate": 4.351225697379544e-05, "loss": 107.4567, "step": 1535, "task_loss": 0.9698376655578613 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9795175014313072, "compression/movement_sparsity/importance_threshold": -0.0016343303984093773, "compression/movement_sparsity/linear_layer_sparsity": 0.56440013996111, "compression/movement_sparsity/model_sparsity": 0.5450112588555273, "compression_loss": 105.34381866455078, "distillation_loss": 2.731828212738037, "epoch": 1.3, "learning_rate": 4.350803043110735e-05, "loss": 108.147, "step": 1536, "task_loss": 1.5721303224563599 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9813919864166756, "compression/movement_sparsity/importance_threshold": -0.0016284444602628037, "compression/movement_sparsity/linear_layer_sparsity": 0.5660561683623787, "compression/movement_sparsity/model_sparsity": 0.5466103975866731, "compression_loss": 105.54450225830078, "distillation_loss": 3.9375834465026855, "epoch": 1.3, "learning_rate": 4.350380388841928e-05, "loss": 109.001, "step": 1537, "task_loss": 2.6489920616149902 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9832619654306939, "compression/movement_sparsity/importance_threshold": -0.0016225726709991838, "compression/movement_sparsity/linear_layer_sparsity": 0.567661256719507, "compression/movement_sparsity/model_sparsity": 0.5481603462209046, "compression_loss": 105.74466705322266, "distillation_loss": 5.179448127746582, "epoch": 1.3, "learning_rate": 4.34995773457312e-05, "loss": 109.7356, "step": 1538, "task_loss": 2.8491363525390625 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9851274438957104, "compression/movement_sparsity/importance_threshold": -0.0016167150135921846, "compression/movement_sparsity/linear_layer_sparsity": 0.5691827566615077, "compression/movement_sparsity/model_sparsity": 0.5496295779592164, "compression_loss": 105.94447326660156, "distillation_loss": 2.8202476501464844, "epoch": 1.3, "learning_rate": 4.349535080304311e-05, "loss": 108.5431, "step": 1539, "task_loss": 1.0403130054473877 }, { "compression/movement_sparsity/importance_regularization_factor": 0.986988427234073, "compression/movement_sparsity/importance_threshold": -0.001610871471015473, "compression/movement_sparsity/linear_layer_sparsity": 0.5710557691412462, "compression/movement_sparsity/model_sparsity": 0.5514382466982155, "compression_loss": 106.14372253417969, "distillation_loss": 3.666755199432373, "epoch": 1.3, "learning_rate": 4.349112426035503e-05, "loss": 109.5219, "step": 1540, "task_loss": 2.089750051498413 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9888449208681303, "compression/movement_sparsity/importance_threshold": -0.0016050420262427147, "compression/movement_sparsity/linear_layer_sparsity": 0.5728565211651113, "compression/movement_sparsity/model_sparsity": 0.5531771373502997, "compression_loss": 106.34252166748047, "distillation_loss": 4.183623790740967, "epoch": 1.3, "learning_rate": 4.348689771766695e-05, "loss": 109.7301, "step": 1541, "task_loss": 2.2211577892303467 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9906969302202301, "compression/movement_sparsity/importance_threshold": -0.001599226662247578, "compression/movement_sparsity/linear_layer_sparsity": 0.5744524159889923, "compression/movement_sparsity/model_sparsity": 0.5547182082774336, "compression_loss": 106.54082489013672, "distillation_loss": 3.5080056190490723, "epoch": 1.3, "learning_rate": 4.348267117497887e-05, "loss": 109.8182, "step": 1542, "task_loss": 2.497711658477783 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9925444607127207, "compression/movement_sparsity/importance_threshold": -0.001593425362003729, "compression/movement_sparsity/linear_layer_sparsity": 0.5760025816299899, "compression/movement_sparsity/model_sparsity": 0.5562151209597955, "compression_loss": 106.7386703491211, "distillation_loss": 6.2112836837768555, "epoch": 1.3, "learning_rate": 4.347844463229079e-05, "loss": 110.4295, "step": 1543, "task_loss": 2.7345328330993652 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9943875177679506, "compression/movement_sparsity/importance_threshold": -0.0015876381084848337, "compression/movement_sparsity/linear_layer_sparsity": 0.5776402825856022, "compression/movement_sparsity/model_sparsity": 0.5577965618494252, "compression_loss": 106.93601989746094, "distillation_loss": 4.030116081237793, "epoch": 1.3, "learning_rate": 4.347421808960271e-05, "loss": 110.2622, "step": 1544, "task_loss": 2.91162109375 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9962261068082677, "compression/movement_sparsity/importance_threshold": -0.0015818648846645596, "compression/movement_sparsity/linear_layer_sparsity": 0.5792424972183302, "compression/movement_sparsity/model_sparsity": 0.5593437354805302, "compression_loss": 107.13289642333984, "distillation_loss": 3.9510879516601562, "epoch": 1.31, "learning_rate": 4.346999154691462e-05, "loss": 110.1842, "step": 1545, "task_loss": 1.7879835367202759 }, { "compression/movement_sparsity/importance_regularization_factor": 0.99806023325602, "compression/movement_sparsity/importance_threshold": -0.001576105673516574, "compression/movement_sparsity/linear_layer_sparsity": 0.5809453995225754, "compression/movement_sparsity/model_sparsity": 0.5609881378518844, "compression_loss": 107.32929229736328, "distillation_loss": 3.6610336303710938, "epoch": 1.31, "learning_rate": 4.346576500422654e-05, "loss": 110.3642, "step": 1546, "task_loss": 1.9599016904830933 }, { "compression/movement_sparsity/importance_regularization_factor": 0.9998899025335564, "compression/movement_sparsity/importance_threshold": -0.001570360458014542, "compression/movement_sparsity/linear_layer_sparsity": 0.5826692048926863, "compression/movement_sparsity/model_sparsity": 0.5626527252044864, "compression_loss": 107.52520751953125, "distillation_loss": 5.01436710357666, "epoch": 1.31, "learning_rate": 4.346153846153846e-05, "loss": 110.9813, "step": 1547, "task_loss": 2.7271504402160645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0017151200632244, "compression/movement_sparsity/importance_threshold": -0.0015646292211321314, "compression/movement_sparsity/linear_layer_sparsity": 0.5841610136572739, "compression/movement_sparsity/model_sparsity": 0.5640932857486698, "compression_loss": 107.72068786621094, "distillation_loss": 3.9223690032958984, "epoch": 1.31, "learning_rate": 4.345731191885038e-05, "loss": 110.7925, "step": 1548, "task_loss": 2.685188055038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0035358912673722, "compression/movement_sparsity/importance_threshold": -0.0015589119458430094, "compression/movement_sparsity/linear_layer_sparsity": 0.585885856429969, "compression/movement_sparsity/model_sparsity": 0.5657588748658858, "compression_loss": 107.91567993164062, "distillation_loss": 2.9874980449676514, "epoch": 1.31, "learning_rate": 4.34530853761623e-05, "loss": 110.6391, "step": 1549, "task_loss": 1.3144748210906982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0053522215683484, "compression/movement_sparsity/importance_threshold": -0.0015532086151208413, "compression/movement_sparsity/linear_layer_sparsity": 0.587547477265859, "compression/movement_sparsity/model_sparsity": 0.5673634139143193, "compression_loss": 108.11019134521484, "distillation_loss": 3.407069206237793, "epoch": 1.31, "learning_rate": 4.344885883347422e-05, "loss": 111.0971, "step": 1550, "task_loss": 2.027880907058716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0071641163885015, "compression/movement_sparsity/importance_threshold": -0.0015475192119392936, "compression/movement_sparsity/linear_layer_sparsity": 0.5890288404595975, "compression/movement_sparsity/model_sparsity": 0.5687938877251467, "compression_loss": 108.30413055419922, "distillation_loss": 4.466436386108398, "epoch": 1.31, "learning_rate": 4.344463229078614e-05, "loss": 111.9754, "step": 1551, "task_loss": 2.2840769290924072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0089715811501785, "compression/movement_sparsity/importance_threshold": -0.0015418437192720358, "compression/movement_sparsity/linear_layer_sparsity": 0.5905933747225962, "compression/movement_sparsity/model_sparsity": 0.570304675423141, "compression_loss": 108.49762725830078, "distillation_loss": 4.507117748260498, "epoch": 1.31, "learning_rate": 4.3440405748098054e-05, "loss": 111.8826, "step": 1552, "task_loss": 1.8480420112609863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.010774621275729, "compression/movement_sparsity/importance_threshold": -0.0015361821200927314, "compression/movement_sparsity/linear_layer_sparsity": 0.5921335002144442, "compression/movement_sparsity/model_sparsity": 0.5717918928663639, "compression_loss": 108.69068145751953, "distillation_loss": 3.8605480194091797, "epoch": 1.31, "learning_rate": 4.3436179205409974e-05, "loss": 112.4289, "step": 1553, "task_loss": 2.2457542419433594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0125732421875, "compression/movement_sparsity/importance_threshold": -0.0015305343973750496, "compression/movement_sparsity/linear_layer_sparsity": 0.59368987834678, "compression/movement_sparsity/model_sparsity": 0.5732948046218748, "compression_loss": 108.88323974609375, "distillation_loss": 3.5598528385162354, "epoch": 1.31, "learning_rate": 4.34319526627219e-05, "loss": 112.2562, "step": 1554, "task_loss": 2.8344852924346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0143674493078403, "compression/movement_sparsity/importance_threshold": -0.0015249005340926564, "compression/movement_sparsity/linear_layer_sparsity": 0.595239865125263, "compression/movement_sparsity/model_sparsity": 0.5747915445861997, "compression_loss": 109.0752944946289, "distillation_loss": 3.137166976928711, "epoch": 1.31, "learning_rate": 4.3427726120033814e-05, "loss": 111.8134, "step": 1555, "task_loss": 1.3303834199905396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0161572480590984, "compression/movement_sparsity/importance_threshold": -0.0015192805132192163, "compression/movement_sparsity/linear_layer_sparsity": 0.5969486937408232, "compression/movement_sparsity/model_sparsity": 0.5764416696818438, "compression_loss": 109.26691436767578, "distillation_loss": 3.54176664352417, "epoch": 1.32, "learning_rate": 4.342349957734573e-05, "loss": 112.3213, "step": 1556, "task_loss": 2.223477363586426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0179426438636214, "compression/movement_sparsity/importance_threshold": -0.0015136743177283996, "compression/movement_sparsity/linear_layer_sparsity": 0.5986538139402486, "compression/movement_sparsity/model_sparsity": 0.5780882137568558, "compression_loss": 109.45797729492188, "distillation_loss": 3.852586507797241, "epoch": 1.32, "learning_rate": 4.341927303465765e-05, "loss": 112.5126, "step": 1557, "task_loss": 2.234609842300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0197236421437588, "compression/movement_sparsity/importance_threshold": -0.0015080819305938703, "compression/movement_sparsity/linear_layer_sparsity": 0.6002549077012189, "compression/movement_sparsity/model_sparsity": 0.5796343050215962, "compression_loss": 109.64867401123047, "distillation_loss": 3.726283073425293, "epoch": 1.32, "learning_rate": 4.3415046491969566e-05, "loss": 112.9503, "step": 1558, "task_loss": 1.2220486402511597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0215002483218583, "compression/movement_sparsity/importance_threshold": -0.0015025033347892958, "compression/movement_sparsity/linear_layer_sparsity": 0.6018999897165979, "compression/movement_sparsity/model_sparsity": 0.5812228734088826, "compression_loss": 109.83879089355469, "distillation_loss": 3.8369719982147217, "epoch": 1.32, "learning_rate": 4.341081994928149e-05, "loss": 113.2569, "step": 1559, "task_loss": 3.256497859954834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0232724678202672, "compression/movement_sparsity/importance_threshold": -0.0014969385132883452, "compression/movement_sparsity/linear_layer_sparsity": 0.6035786024913689, "compression/movement_sparsity/model_sparsity": 0.5828438206708233, "compression_loss": 110.02848052978516, "distillation_loss": 4.360718727111816, "epoch": 1.32, "learning_rate": 4.340659340659341e-05, "loss": 114.0015, "step": 1560, "task_loss": 3.466217279434204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0250403060613351, "compression/movement_sparsity/importance_threshold": -0.001491387449064681, "compression/movement_sparsity/linear_layer_sparsity": 0.6052541388308897, "compression/movement_sparsity/model_sparsity": 0.584461797182529, "compression_loss": 110.21770477294922, "distillation_loss": 4.478077411651611, "epoch": 1.32, "learning_rate": 4.3402366863905325e-05, "loss": 114.0476, "step": 1561, "task_loss": 2.728201389312744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0268037684674094, "compression/movement_sparsity/importance_threshold": -0.0014858501250919727, "compression/movement_sparsity/linear_layer_sparsity": 0.60700488089569, "compression/movement_sparsity/model_sparsity": 0.5861523958714908, "compression_loss": 110.40650177001953, "distillation_loss": 2.6250550746917725, "epoch": 1.32, "learning_rate": 4.3398140321217245e-05, "loss": 113.4276, "step": 1562, "task_loss": 1.7377750873565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0285628604608383, "compression/movement_sparsity/importance_threshold": -0.0014803265243438874, "compression/movement_sparsity/linear_layer_sparsity": 0.6086484008451087, "compression/movement_sparsity/model_sparsity": 0.5877394558545882, "compression_loss": 110.5948486328125, "distillation_loss": 3.2099967002868652, "epoch": 1.32, "learning_rate": 4.3393913778529165e-05, "loss": 114.2914, "step": 1563, "task_loss": 1.6697765588760376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0303175874639705, "compression/movement_sparsity/importance_threshold": -0.0014748166297940896, "compression/movement_sparsity/linear_layer_sparsity": 0.6102528214488493, "compression/movement_sparsity/model_sparsity": 0.5892887596748152, "compression_loss": 110.78262329101562, "distillation_loss": 2.9184906482696533, "epoch": 1.32, "learning_rate": 4.3389687235841084e-05, "loss": 113.6251, "step": 1564, "task_loss": 1.1743499040603638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0320679548991538, "compression/movement_sparsity/importance_threshold": -0.0014693204244162475, "compression/movement_sparsity/linear_layer_sparsity": 0.611732968377489, "compression/movement_sparsity/model_sparsity": 0.5907180590029916, "compression_loss": 110.97003936767578, "distillation_loss": 3.2068748474121094, "epoch": 1.32, "learning_rate": 4.3385460693153004e-05, "loss": 113.8487, "step": 1565, "task_loss": 1.4513297080993652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0338139681887362, "compression/movement_sparsity/importance_threshold": -0.0014638378911840289, "compression/movement_sparsity/linear_layer_sparsity": 0.6131744810029884, "compression/movement_sparsity/model_sparsity": 0.5921100512351937, "compression_loss": 111.15686798095703, "distillation_loss": 4.41751766204834, "epoch": 1.32, "learning_rate": 4.3381234150464924e-05, "loss": 114.8833, "step": 1566, "task_loss": 2.231030225753784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0355556327550666, "compression/movement_sparsity/importance_threshold": -0.0014583690130710973, "compression/movement_sparsity/linear_layer_sparsity": 0.6147882143816528, "compression/movement_sparsity/model_sparsity": 0.5936683479078763, "compression_loss": 111.34326934814453, "distillation_loss": 6.527504920959473, "epoch": 1.32, "learning_rate": 4.3377007607776844e-05, "loss": 114.8586, "step": 1567, "task_loss": 3.6513991355895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0372929540204925, "compression/movement_sparsity/importance_threshold": -0.001452913773051122, "compression/movement_sparsity/linear_layer_sparsity": 0.616481553503454, "compression/movement_sparsity/model_sparsity": 0.5953035156215232, "compression_loss": 111.52920532226562, "distillation_loss": 4.529018402099609, "epoch": 1.33, "learning_rate": 4.3372781065088757e-05, "loss": 114.9943, "step": 1568, "task_loss": 2.6245908737182617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.039025937407362, "compression/movement_sparsity/importance_threshold": -0.0014474721540977701, "compression/movement_sparsity/linear_layer_sparsity": 0.6181607028657686, "compression/movement_sparsity/model_sparsity": 0.5969249810375746, "compression_loss": 111.71471405029297, "distillation_loss": 3.5183393955230713, "epoch": 1.33, "learning_rate": 4.3368554522400676e-05, "loss": 115.4276, "step": 1569, "task_loss": 1.5850911140441895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0407545883380243, "compression/movement_sparsity/importance_threshold": -0.0014420441391847063, "compression/movement_sparsity/linear_layer_sparsity": 0.6196274589541532, "compression/movement_sparsity/model_sparsity": 0.5983413495420536, "compression_loss": 111.8996810913086, "distillation_loss": 3.3578598499298096, "epoch": 1.33, "learning_rate": 4.3364327979712596e-05, "loss": 115.9626, "step": 1570, "task_loss": 1.2393653392791748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0424789122348264, "compression/movement_sparsity/importance_threshold": -0.0014366297112855986, "compression/movement_sparsity/linear_layer_sparsity": 0.6212466774499299, "compression/movement_sparsity/model_sparsity": 0.5999049429012018, "compression_loss": 112.084228515625, "distillation_loss": 4.579760551452637, "epoch": 1.33, "learning_rate": 4.3360101437024516e-05, "loss": 115.9855, "step": 1571, "task_loss": 2.1139378547668457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0441989145201171, "compression/movement_sparsity/importance_threshold": -0.0014312288533741132, "compression/movement_sparsity/linear_layer_sparsity": 0.6227893666378197, "compression/movement_sparsity/model_sparsity": 0.6013946359696205, "compression_loss": 112.2683334350586, "distillation_loss": 3.9380552768707275, "epoch": 1.33, "learning_rate": 4.3355874894336436e-05, "loss": 115.8131, "step": 1572, "task_loss": 1.858887791633606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.045914600616245, "compression/movement_sparsity/importance_threshold": -0.0014258415484239163, "compression/movement_sparsity/linear_layer_sparsity": 0.6243043202117885, "compression/movement_sparsity/model_sparsity": 0.6028575462277811, "compression_loss": 112.4519271850586, "distillation_loss": 3.7959330081939697, "epoch": 1.33, "learning_rate": 4.3351648351648355e-05, "loss": 116.1372, "step": 1573, "task_loss": 2.0476863384246826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0476259759455573, "compression/movement_sparsity/importance_threshold": -0.0014204677794086768, "compression/movement_sparsity/linear_layer_sparsity": 0.6256862716199468, "compression/movement_sparsity/model_sparsity": 0.6041920233536894, "compression_loss": 112.63510131835938, "distillation_loss": 2.433563470840454, "epoch": 1.33, "learning_rate": 4.334742180896027e-05, "loss": 114.9126, "step": 1574, "task_loss": 1.2678323984146118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0493330459304029, "compression/movement_sparsity/importance_threshold": -0.0014151075293020589, "compression/movement_sparsity/linear_layer_sparsity": 0.6272780168334905, "compression/movement_sparsity/model_sparsity": 0.6057290872223668, "compression_loss": 112.81768035888672, "distillation_loss": 3.221038579940796, "epoch": 1.33, "learning_rate": 4.334319526627219e-05, "loss": 116.8326, "step": 1575, "task_loss": 1.185097098350525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0510358159931297, "compression/movement_sparsity/importance_threshold": -0.001409760781077731, "compression/movement_sparsity/linear_layer_sparsity": 0.6285827354078714, "compression/movement_sparsity/model_sparsity": 0.6069889846999339, "compression_loss": 112.9998779296875, "distillation_loss": 5.349943161010742, "epoch": 1.33, "learning_rate": 4.3338968723584115e-05, "loss": 116.8133, "step": 1576, "task_loss": 2.0130677223205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.052734291556086, "compression/movement_sparsity/importance_threshold": -0.0014044275177093595, "compression/movement_sparsity/linear_layer_sparsity": 0.6301701402243957, "compression/movement_sparsity/model_sparsity": 0.6085218572775821, "compression_loss": 113.18162536621094, "distillation_loss": 3.5576772689819336, "epoch": 1.33, "learning_rate": 4.3334742180896034e-05, "loss": 117.106, "step": 1577, "task_loss": 1.9461464881896973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0544284780416202, "compression/movement_sparsity/importance_threshold": -0.00139910772217061, "compression/movement_sparsity/linear_layer_sparsity": 0.6317991246134662, "compression/movement_sparsity/model_sparsity": 0.610094881041546, "compression_loss": 113.36290740966797, "distillation_loss": 3.3254685401916504, "epoch": 1.33, "learning_rate": 4.333051563820795e-05, "loss": 117.0264, "step": 1578, "task_loss": 1.3175159692764282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0561183808720802, "compression/movement_sparsity/importance_threshold": -0.00139380137743515, "compression/movement_sparsity/linear_layer_sparsity": 0.6332216062674189, "compression/movement_sparsity/model_sparsity": 0.61146849607462, "compression_loss": 113.54379272460938, "distillation_loss": 4.478581428527832, "epoch": 1.33, "learning_rate": 4.332628909551987e-05, "loss": 117.2172, "step": 1579, "task_loss": 2.7694849967956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0578040054698143, "compression/movement_sparsity/importance_threshold": -0.0013885084664766473, "compression/movement_sparsity/linear_layer_sparsity": 0.6346106406311529, "compression/movement_sparsity/model_sparsity": 0.6128098128347902, "compression_loss": 113.72419738769531, "distillation_loss": 4.755603790283203, "epoch": 1.34, "learning_rate": 4.332206255283179e-05, "loss": 118.0979, "step": 1580, "task_loss": 2.0192434787750244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0594853572571705, "compression/movement_sparsity/importance_threshold": -0.001383228972268767, "compression/movement_sparsity/linear_layer_sparsity": 0.6360831918650085, "compression/movement_sparsity/model_sparsity": 0.6142317774036654, "compression_loss": 113.90418243408203, "distillation_loss": 3.7032461166381836, "epoch": 1.34, "learning_rate": 4.3317836010143706e-05, "loss": 117.6311, "step": 1581, "task_loss": 2.0278055667877197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0611624416564975, "compression/movement_sparsity/importance_threshold": -0.001377962877785177, "compression/movement_sparsity/linear_layer_sparsity": 0.6374225262980363, "compression/movement_sparsity/model_sparsity": 0.615525101578644, "compression_loss": 114.08367156982422, "distillation_loss": 4.694839954376221, "epoch": 1.34, "learning_rate": 4.3313609467455626e-05, "loss": 118.2582, "step": 1582, "task_loss": 2.7239973545074463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0628352640901428, "compression/movement_sparsity/importance_threshold": -0.0013727101659995432, "compression/movement_sparsity/linear_layer_sparsity": 0.63889513715273, "compression/movement_sparsity/model_sparsity": 0.6169471237201982, "compression_loss": 114.26268768310547, "distillation_loss": 3.1247453689575195, "epoch": 1.34, "learning_rate": 4.3309382924767546e-05, "loss": 118.0914, "step": 1583, "task_loss": 1.7471249103546143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0645038299804552, "compression/movement_sparsity/importance_threshold": -0.0013674708198855327, "compression/movement_sparsity/linear_layer_sparsity": 0.6405620880915532, "compression/movement_sparsity/model_sparsity": 0.6185568097661318, "compression_loss": 114.44129943847656, "distillation_loss": 3.806407928466797, "epoch": 1.34, "learning_rate": 4.330515638207946e-05, "loss": 118.4568, "step": 1584, "task_loss": 2.9354496002197266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0661681447497828, "compression/movement_sparsity/importance_threshold": -0.0013622448224168107, "compression/movement_sparsity/linear_layer_sparsity": 0.6419462096982212, "compression/movement_sparsity/model_sparsity": 0.6198933825375548, "compression_loss": 114.61946105957031, "distillation_loss": 3.8559744358062744, "epoch": 1.34, "learning_rate": 4.330092983939138e-05, "loss": 118.1757, "step": 1585, "task_loss": 1.7772661447525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0678282138204733, "compression/movement_sparsity/importance_threshold": -0.0013570321565670473, "compression/movement_sparsity/linear_layer_sparsity": 0.6433519617449808, "compression/movement_sparsity/model_sparsity": 0.621250842676909, "compression_loss": 114.7971420288086, "distillation_loss": 5.358077526092529, "epoch": 1.34, "learning_rate": 4.32967032967033e-05, "loss": 118.7218, "step": 1586, "task_loss": 2.0680317878723145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0694840426148755, "compression/movement_sparsity/importance_threshold": -0.0013518328053099059, "compression/movement_sparsity/linear_layer_sparsity": 0.6448976081264443, "compression/movement_sparsity/model_sparsity": 0.6227433913502047, "compression_loss": 114.97431945800781, "distillation_loss": 3.985830783843994, "epoch": 1.34, "learning_rate": 4.329247675401522e-05, "loss": 118.1344, "step": 1587, "task_loss": 1.5103660821914673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0711356365553373, "compression/movement_sparsity/importance_threshold": -0.0013466467516190551, "compression/movement_sparsity/linear_layer_sparsity": 0.646425725981483, "compression/movement_sparsity/model_sparsity": 0.6242190136558825, "compression_loss": 115.15105438232422, "distillation_loss": 3.744576930999756, "epoch": 1.34, "learning_rate": 4.328825021132714e-05, "loss": 118.8127, "step": 1588, "task_loss": 1.76736319065094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0727830010642068, "compression/movement_sparsity/importance_threshold": -0.0013414739784681617, "compression/movement_sparsity/linear_layer_sparsity": 0.6479528779789433, "compression/movement_sparsity/model_sparsity": 0.625693703284161, "compression_loss": 115.32734680175781, "distillation_loss": 4.767174243927002, "epoch": 1.34, "learning_rate": 4.328402366863906e-05, "loss": 119.5958, "step": 1589, "task_loss": 1.9313116073608398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.074426141563833, "compression/movement_sparsity/importance_threshold": -0.0013363144688308898, "compression/movement_sparsity/linear_layer_sparsity": 0.6493796284849095, "compression/movement_sparsity/model_sparsity": 0.6270714405210494, "compression_loss": 115.50312042236328, "distillation_loss": 4.791862964630127, "epoch": 1.34, "learning_rate": 4.327979712595097e-05, "loss": 119.7996, "step": 1590, "task_loss": 3.1982789039611816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0760650634765625, "compression/movement_sparsity/importance_threshold": -0.0013311682056809104, "compression/movement_sparsity/linear_layer_sparsity": 0.6507805273954412, "compression/movement_sparsity/model_sparsity": 0.6284242142443351, "compression_loss": 115.67848205566406, "distillation_loss": 3.7637436389923096, "epoch": 1.34, "learning_rate": 4.327557058326289e-05, "loss": 119.6238, "step": 1591, "task_loss": 2.3045530319213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0776997722247448, "compression/movement_sparsity/importance_threshold": -0.0013260351719918875, "compression/movement_sparsity/linear_layer_sparsity": 0.6523456459426541, "compression/movement_sparsity/model_sparsity": 0.6299355661545833, "compression_loss": 115.85338592529297, "distillation_loss": 2.95743465423584, "epoch": 1.35, "learning_rate": 4.327134404057481e-05, "loss": 119.8796, "step": 1592, "task_loss": 2.1221401691436768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.079330273230728, "compression/movement_sparsity/importance_threshold": -0.001320915350737487, "compression/movement_sparsity/linear_layer_sparsity": 0.6538129266944147, "compression/movement_sparsity/model_sparsity": 0.6313524412986374, "compression_loss": 116.02781677246094, "distillation_loss": 5.060418128967285, "epoch": 1.35, "learning_rate": 4.3267117497886737e-05, "loss": 119.7491, "step": 1593, "task_loss": 2.6820931434631348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0809565719168597, "compression/movement_sparsity/importance_threshold": -0.0013158087248913782, "compression/movement_sparsity/linear_layer_sparsity": 0.6554549799712142, "compression/movement_sparsity/model_sparsity": 0.6329380849938321, "compression_loss": 116.20174407958984, "distillation_loss": 2.4874887466430664, "epoch": 1.35, "learning_rate": 4.326289095519865e-05, "loss": 120.4098, "step": 1594, "task_loss": 2.1427178382873535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0825786737054883, "compression/movement_sparsity/importance_threshold": -0.0013107152774272262, "compression/movement_sparsity/linear_layer_sparsity": 0.6567134566235031, "compression/movement_sparsity/model_sparsity": 0.634153329101588, "compression_loss": 116.37520599365234, "distillation_loss": 3.4833521842956543, "epoch": 1.35, "learning_rate": 4.325866441251057e-05, "loss": 120.488, "step": 1595, "task_loss": 2.1483876705169678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0841965840189625, "compression/movement_sparsity/importance_threshold": -0.0013056349913186977, "compression/movement_sparsity/linear_layer_sparsity": 0.6581914810503036, "compression/movement_sparsity/model_sparsity": 0.6355805788423928, "compression_loss": 116.54830169677734, "distillation_loss": 4.280088901519775, "epoch": 1.35, "learning_rate": 4.325443786982249e-05, "loss": 120.4263, "step": 1596, "task_loss": 1.8133630752563477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0858103082796298, "compression/movement_sparsity/importance_threshold": -0.0013005678495394603, "compression/movement_sparsity/linear_layer_sparsity": 0.6596304657522643, "compression/movement_sparsity/model_sparsity": 0.6369701299930064, "compression_loss": 116.7208480834961, "distillation_loss": 4.008568286895752, "epoch": 1.35, "learning_rate": 4.32502113271344e-05, "loss": 120.6723, "step": 1597, "task_loss": 2.8913395404815674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0874198519098388, "compression/movement_sparsity/importance_threshold": -0.00129551383506318, "compression/movement_sparsity/linear_layer_sparsity": 0.6611544697694687, "compression/movement_sparsity/model_sparsity": 0.6384417797838351, "compression_loss": 116.89300537109375, "distillation_loss": 4.252841472625732, "epoch": 1.35, "learning_rate": 4.324598478444633e-05, "loss": 120.5307, "step": 1598, "task_loss": 2.6416594982147217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0890252203319377, "compression/movement_sparsity/importance_threshold": -0.0012904729308635234, "compression/movement_sparsity/linear_layer_sparsity": 0.6625170205568834, "compression/movement_sparsity/model_sparsity": 0.6397575227600056, "compression_loss": 117.06465911865234, "distillation_loss": 3.74229097366333, "epoch": 1.35, "learning_rate": 4.324175824175825e-05, "loss": 121.2333, "step": 1599, "task_loss": 2.898186683654785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0906264189682742, "compression/movement_sparsity/importance_threshold": -0.0012854451199141585, "compression/movement_sparsity/linear_layer_sparsity": 0.6637171522569302, "compression/movement_sparsity/model_sparsity": 0.6409164262441188, "compression_loss": 117.23584747314453, "distillation_loss": 4.659516334533691, "epoch": 1.35, "learning_rate": 4.323753169907016e-05, "loss": 120.671, "step": 1600, "task_loss": 2.0339272022247314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0922234532411974, "compression/movement_sparsity/importance_threshold": -0.0012804303851887493, "compression/movement_sparsity/linear_layer_sparsity": 0.6650139531840008, "compression/movement_sparsity/model_sparsity": 0.6421686780699183, "compression_loss": 117.40653991699219, "distillation_loss": 4.822331428527832, "epoch": 1.35, "learning_rate": 4.323330515638208e-05, "loss": 122.4939, "step": 1601, "task_loss": 2.14998197555542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0938163285730549, "compression/movement_sparsity/importance_threshold": -0.0012754287096609653, "compression/movement_sparsity/linear_layer_sparsity": 0.6665544840975485, "compression/movement_sparsity/model_sparsity": 0.6436562870073582, "compression_loss": 117.57678985595703, "distillation_loss": 5.297080039978027, "epoch": 1.35, "learning_rate": 4.3229078613694e-05, "loss": 121.7672, "step": 1602, "task_loss": 3.3388876914978027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0954050503861945, "compression/movement_sparsity/importance_threshold": -0.001270440076304473, "compression/movement_sparsity/linear_layer_sparsity": 0.6678812027612175, "compression/movement_sparsity/model_sparsity": 0.6449374288034662, "compression_loss": 117.74663543701172, "distillation_loss": 4.490848064422607, "epoch": 1.35, "learning_rate": 4.3224852071005914e-05, "loss": 121.8935, "step": 1603, "task_loss": 1.8725887537002563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.096989624102965, "compression/movement_sparsity/importance_threshold": -0.0012654644680929377, "compression/movement_sparsity/linear_layer_sparsity": 0.6692411183075848, "compression/movement_sparsity/model_sparsity": 0.6462506270672262, "compression_loss": 117.91603088378906, "distillation_loss": 3.412388801574707, "epoch": 1.36, "learning_rate": 4.322062552831784e-05, "loss": 122.3655, "step": 1604, "task_loss": 2.1094701290130615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.0985700551457152, "compression/movement_sparsity/importance_threshold": -0.0012605018680000258, "compression/movement_sparsity/linear_layer_sparsity": 0.6707111177695663, "compression/movement_sparsity/model_sparsity": 0.6476701275254414, "compression_loss": 118.08499908447266, "distillation_loss": 3.7808332443237305, "epoch": 1.36, "learning_rate": 4.321639898562976e-05, "loss": 122.0265, "step": 1605, "task_loss": 2.498356819152832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1001463489367915, "compression/movement_sparsity/importance_threshold": -0.0012555522589994066, "compression/movement_sparsity/linear_layer_sparsity": 0.6719697255876993, "compression/movement_sparsity/model_sparsity": 0.648885498293091, "compression_loss": 118.25349426269531, "distillation_loss": 3.7680885791778564, "epoch": 1.36, "learning_rate": 4.321217244294168e-05, "loss": 122.624, "step": 1606, "task_loss": 2.3853378295898438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.101718510898544, "compression/movement_sparsity/importance_threshold": -0.001250615624064744, "compression/movement_sparsity/linear_layer_sparsity": 0.6733138296877814, "compression/movement_sparsity/model_sparsity": 0.6501834282823874, "compression_loss": 118.42152404785156, "distillation_loss": 5.073239803314209, "epoch": 1.36, "learning_rate": 4.320794590025359e-05, "loss": 122.7151, "step": 1607, "task_loss": 2.7709081172943115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1032865464533197, "compression/movement_sparsity/importance_threshold": -0.0012456919461697061, "compression/movement_sparsity/linear_layer_sparsity": 0.6748297610434963, "compression/movement_sparsity/model_sparsity": 0.6516472827324832, "compression_loss": 118.58906555175781, "distillation_loss": 5.1621527671813965, "epoch": 1.36, "learning_rate": 4.320371935756551e-05, "loss": 122.771, "step": 1608, "task_loss": 2.8966009616851807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1048504610234668, "compression/movement_sparsity/importance_threshold": -0.00124078120828796, "compression/movement_sparsity/linear_layer_sparsity": 0.6759656334041534, "compression/movement_sparsity/model_sparsity": 0.6527441343831996, "compression_loss": 118.75621795654297, "distillation_loss": 4.108824253082275, "epoch": 1.36, "learning_rate": 4.319949281487743e-05, "loss": 123.11, "step": 1609, "task_loss": 2.221781015396118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1064102600313346, "compression/movement_sparsity/importance_threshold": -0.001235883393393171, "compression/movement_sparsity/linear_layer_sparsity": 0.6774813143523479, "compression/movement_sparsity/model_sparsity": 0.6542077470280436, "compression_loss": 118.92295837402344, "distillation_loss": 5.164173603057861, "epoch": 1.36, "learning_rate": 4.319526627218935e-05, "loss": 122.8529, "step": 1610, "task_loss": 2.004044532775879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1079659488992701, "compression/movement_sparsity/importance_threshold": -0.0012309984844590073, "compression/movement_sparsity/linear_layer_sparsity": 0.6790146430686436, "compression/movement_sparsity/model_sparsity": 0.6556884011858636, "compression_loss": 119.08917236328125, "distillation_loss": 7.246700763702393, "epoch": 1.36, "learning_rate": 4.319103972950127e-05, "loss": 123.8255, "step": 1611, "task_loss": 4.234955787658691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1095175330496219, "compression/movement_sparsity/importance_threshold": -0.001226126464459135, "compression/movement_sparsity/linear_layer_sparsity": 0.6804013641438562, "compression/movement_sparsity/model_sparsity": 0.6570274841260897, "compression_loss": 119.25492858886719, "distillation_loss": 3.6851930618286133, "epoch": 1.36, "learning_rate": 4.318681318681319e-05, "loss": 123.2515, "step": 1612, "task_loss": 3.4537675380706787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1110650179047383, "compression/movement_sparsity/importance_threshold": -0.0012212673163672205, "compression/movement_sparsity/linear_layer_sparsity": 0.6816465572733988, "compression/movement_sparsity/model_sparsity": 0.6582299010409705, "compression_loss": 119.42024230957031, "distillation_loss": 2.9697279930114746, "epoch": 1.36, "learning_rate": 4.3182586644125104e-05, "loss": 123.0545, "step": 1613, "task_loss": 1.1787941455841064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1126084088869674, "compression/movement_sparsity/importance_threshold": -0.0012164210231569313, "compression/movement_sparsity/linear_layer_sparsity": 0.682951788586988, "compression/movement_sparsity/model_sparsity": 0.6594902936435767, "compression_loss": 119.5850601196289, "distillation_loss": 4.07765531539917, "epoch": 1.36, "learning_rate": 4.3178360101437024e-05, "loss": 124.2977, "step": 1614, "task_loss": 2.54297137260437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1141477114186573, "compression/movement_sparsity/importance_threshold": -0.0012115875678019332, "compression/movement_sparsity/linear_layer_sparsity": 0.6842453342162941, "compression/movement_sparsity/model_sparsity": 0.6607394020011043, "compression_loss": 119.74951171875, "distillation_loss": 4.686488151550293, "epoch": 1.36, "learning_rate": 4.317413355874895e-05, "loss": 123.9731, "step": 1615, "task_loss": 2.6008150577545166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1156829309221568, "compression/movement_sparsity/importance_threshold": -0.0012067669332758934, "compression/movement_sparsity/linear_layer_sparsity": 0.6855348733252745, "compression/movement_sparsity/model_sparsity": 0.6619846414746049, "compression_loss": 119.91354370117188, "distillation_loss": 2.927152156829834, "epoch": 1.37, "learning_rate": 4.3169907016060863e-05, "loss": 124.3327, "step": 1616, "task_loss": 1.656904697418213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.117214072819813, "compression/movement_sparsity/importance_threshold": -0.0012019591025524794, "compression/movement_sparsity/linear_layer_sparsity": 0.6869271749109408, "compression/movement_sparsity/model_sparsity": 0.6633291132175828, "compression_loss": 120.07717895507812, "distillation_loss": 4.406262397766113, "epoch": 1.37, "learning_rate": 4.316568047337278e-05, "loss": 123.9187, "step": 1617, "task_loss": 2.220959186553955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.118741142533975, "compression/movement_sparsity/importance_threshold": -0.001197164058605356, "compression/movement_sparsity/linear_layer_sparsity": 0.6880727177715505, "compression/movement_sparsity/model_sparsity": 0.6644353031568286, "compression_loss": 120.24034118652344, "distillation_loss": 4.4984517097473145, "epoch": 1.37, "learning_rate": 4.31614539306847e-05, "loss": 124.8044, "step": 1618, "task_loss": 1.910883903503418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1202641454869908, "compression/movement_sparsity/importance_threshold": -0.0011923817844081913, "compression/movement_sparsity/linear_layer_sparsity": 0.689435280483133, "compression/movement_sparsity/model_sparsity": 0.665751057647535, "compression_loss": 120.4030532836914, "distillation_loss": 5.075961112976074, "epoch": 1.37, "learning_rate": 4.3157227387996616e-05, "loss": 124.3477, "step": 1619, "task_loss": 2.0177574157714844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1217830871012082, "compression/movement_sparsity/importance_threshold": -0.0011876122629346526, "compression/movement_sparsity/linear_layer_sparsity": 0.6906907283968424, "compression/movement_sparsity/model_sparsity": 0.666963377063199, "compression_loss": 120.5652847290039, "distillation_loss": 3.847567081451416, "epoch": 1.37, "learning_rate": 4.3153000845308536e-05, "loss": 125.3068, "step": 1620, "task_loss": 0.9891605377197266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1232979727989758, "compression/movement_sparsity/importance_threshold": -0.0011828554771584053, "compression/movement_sparsity/linear_layer_sparsity": 0.6920094101715248, "compression/movement_sparsity/model_sparsity": 0.6682367580621814, "compression_loss": 120.72709655761719, "distillation_loss": 4.334209442138672, "epoch": 1.37, "learning_rate": 4.314877430262046e-05, "loss": 125.268, "step": 1621, "task_loss": 3.0601370334625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1248088080026417, "compression/movement_sparsity/importance_threshold": -0.0011781114100531164, "compression/movement_sparsity/linear_layer_sparsity": 0.6931947797520384, "compression/movement_sparsity/model_sparsity": 0.669381406550981, "compression_loss": 120.88846588134766, "distillation_loss": 5.940729141235352, "epoch": 1.37, "learning_rate": 4.314454775993238e-05, "loss": 125.5652, "step": 1622, "task_loss": 2.800605535507202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.126315598134554, "compression/movement_sparsity/importance_threshold": -0.0011733800445924533, "compression/movement_sparsity/linear_layer_sparsity": 0.694547481176986, "compression/movement_sparsity/model_sparsity": 0.6706876385205852, "compression_loss": 121.04940795898438, "distillation_loss": 3.9292197227478027, "epoch": 1.37, "learning_rate": 4.3140321217244295e-05, "loss": 125.5798, "step": 1623, "task_loss": 2.5937817096710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.127818348617061, "compression/movement_sparsity/importance_threshold": -0.0011686613637500818, "compression/movement_sparsity/linear_layer_sparsity": 0.6956885643988998, "compression/movement_sparsity/model_sparsity": 0.6717895220234439, "compression_loss": 121.20983123779297, "distillation_loss": 5.505153656005859, "epoch": 1.37, "learning_rate": 4.3136094674556215e-05, "loss": 126.286, "step": 1624, "task_loss": 3.20289945602417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.129317064872511, "compression/movement_sparsity/importance_threshold": -0.0011639553504996692, "compression/movement_sparsity/linear_layer_sparsity": 0.6968212649309657, "compression/movement_sparsity/model_sparsity": 0.6728833108076389, "compression_loss": 121.3698959350586, "distillation_loss": 3.1672205924987793, "epoch": 1.37, "learning_rate": 4.3131868131868134e-05, "loss": 125.4565, "step": 1625, "task_loss": 1.7627214193344116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.130811752323252, "compression/movement_sparsity/importance_threshold": -0.0011592619878148828, "compression/movement_sparsity/linear_layer_sparsity": 0.6981937962466918, "compression/movement_sparsity/model_sparsity": 0.6742086914502695, "compression_loss": 121.52953338623047, "distillation_loss": 4.028879165649414, "epoch": 1.37, "learning_rate": 4.3127641589180054e-05, "loss": 125.9492, "step": 1626, "task_loss": 1.833629846572876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1323024163916324, "compression/movement_sparsity/importance_threshold": -0.0011545812586693871, "compression/movement_sparsity/linear_layer_sparsity": 0.6994968692859389, "compression/movement_sparsity/model_sparsity": 0.6754669999218968, "compression_loss": 121.68870544433594, "distillation_loss": 5.096287727355957, "epoch": 1.38, "learning_rate": 4.3123415046491974e-05, "loss": 126.73, "step": 1627, "task_loss": 2.98466157913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1337890625, "compression/movement_sparsity/importance_threshold": -0.0011499131460368517, "compression/movement_sparsity/linear_layer_sparsity": 0.7007197522978348, "compression/movement_sparsity/model_sparsity": 0.676647873140306, "compression_loss": 121.8475112915039, "distillation_loss": 4.1848344802856445, "epoch": 1.38, "learning_rate": 4.3119188503803893e-05, "loss": 125.5736, "step": 1628, "task_loss": 2.1423747539520264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1352716960707032, "compression/movement_sparsity/importance_threshold": -0.0011452576328909421, "compression/movement_sparsity/linear_layer_sparsity": 0.7016915361876547, "compression/movement_sparsity/model_sparsity": 0.6775862732639537, "compression_loss": 122.00588989257812, "distillation_loss": 4.5834574699401855, "epoch": 1.38, "learning_rate": 4.3114961961115806e-05, "loss": 126.795, "step": 1629, "task_loss": 2.2282190322875977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1367503225260904, "compression/movement_sparsity/importance_threshold": -0.0011406147022053237, "compression/movement_sparsity/linear_layer_sparsity": 0.7031004362146701, "compression/movement_sparsity/model_sparsity": 0.6789467732407577, "compression_loss": 122.16371154785156, "distillation_loss": 5.430883407592773, "epoch": 1.38, "learning_rate": 4.3110735418427726e-05, "loss": 126.6811, "step": 1630, "task_loss": 2.839416027069092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1382249472885098, "compression/movement_sparsity/importance_threshold": -0.001135984336953665, "compression/movement_sparsity/linear_layer_sparsity": 0.7045009893243405, "compression/movement_sparsity/model_sparsity": 0.6802992130425055, "compression_loss": 122.32119750976562, "distillation_loss": 5.543839454650879, "epoch": 1.38, "learning_rate": 4.3106508875739646e-05, "loss": 127.0583, "step": 1631, "task_loss": 3.2805869579315186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1396955757803091, "compression/movement_sparsity/importance_threshold": -0.0011313665201096328, "compression/movement_sparsity/linear_layer_sparsity": 0.7058499704089856, "compression/movement_sparsity/model_sparsity": 0.6816018524769419, "compression_loss": 122.47811889648438, "distillation_loss": 4.163388729095459, "epoch": 1.38, "learning_rate": 4.3102282333051566e-05, "loss": 126.7668, "step": 1632, "task_loss": 2.362947702407837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1411622134238373, "compression/movement_sparsity/importance_threshold": -0.0011267612346468918, "compression/movement_sparsity/linear_layer_sparsity": 0.7071779768827593, "compression/movement_sparsity/model_sparsity": 0.6828842378429156, "compression_loss": 122.63465881347656, "distillation_loss": 5.2354655265808105, "epoch": 1.38, "learning_rate": 4.3098055790363485e-05, "loss": 127.0408, "step": 1633, "task_loss": 2.386781930923462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1426248656414415, "compression/movement_sparsity/importance_threshold": -0.0011221684635391118, "compression/movement_sparsity/linear_layer_sparsity": 0.7083952555358664, "compression/movement_sparsity/model_sparsity": 0.6840596992295014, "compression_loss": 122.79078674316406, "distillation_loss": 6.934826374053955, "epoch": 1.38, "learning_rate": 4.3093829247675405e-05, "loss": 127.7497, "step": 1634, "task_loss": 3.19840931892395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1440835378554708, "compression/movement_sparsity/importance_threshold": -0.0011175881897599565, "compression/movement_sparsity/linear_layer_sparsity": 0.709663402688108, "compression/movement_sparsity/model_sparsity": 0.6852842816257866, "compression_loss": 122.94646453857422, "distillation_loss": 4.748937606811523, "epoch": 1.38, "learning_rate": 4.3089602704987325e-05, "loss": 127.7195, "step": 1635, "task_loss": 2.836721658706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1455382354882733, "compression/movement_sparsity/importance_threshold": -0.0011130203962830937, "compression/movement_sparsity/linear_layer_sparsity": 0.7108427028672949, "compression/movement_sparsity/model_sparsity": 0.6864230692158667, "compression_loss": 123.10173034667969, "distillation_loss": 3.8511111736297607, "epoch": 1.38, "learning_rate": 4.308537616229924e-05, "loss": 127.113, "step": 1636, "task_loss": 2.2419724464416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1469889639621966, "compression/movement_sparsity/importance_threshold": -0.0011084650660821917, "compression/movement_sparsity/linear_layer_sparsity": 0.712111326986242, "compression/movement_sparsity/model_sparsity": 0.6876481121935837, "compression_loss": 123.25662994384766, "distillation_loss": 4.497777462005615, "epoch": 1.38, "learning_rate": 4.308114961961116e-05, "loss": 127.6351, "step": 1637, "task_loss": 1.9297791719436646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1484357286995897, "compression/movement_sparsity/importance_threshold": -0.0011039221821309148, "compression/movement_sparsity/linear_layer_sparsity": 0.7133632572704989, "compression/movement_sparsity/model_sparsity": 0.6888570348211883, "compression_loss": 123.41108703613281, "distillation_loss": 3.224041223526001, "epoch": 1.38, "learning_rate": 4.3076923076923084e-05, "loss": 126.7166, "step": 1638, "task_loss": 2.079244613647461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1498785351228002, "compression/movement_sparsity/importance_threshold": -0.0010993917274029312, "compression/movement_sparsity/linear_layer_sparsity": 0.7146344450854877, "compression/movement_sparsity/model_sparsity": 0.6900845534241011, "compression_loss": 123.5650863647461, "distillation_loss": 5.317746162414551, "epoch": 1.39, "learning_rate": 4.3072696534235e-05, "loss": 127.8603, "step": 1639, "task_loss": 2.426868200302124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1513173886541763, "compression/movement_sparsity/importance_threshold": -0.001094873684871908, "compression/movement_sparsity/linear_layer_sparsity": 0.7158877228006875, "compression/movement_sparsity/model_sparsity": 0.6912947771942505, "compression_loss": 123.71871185302734, "distillation_loss": 5.781313419342041, "epoch": 1.39, "learning_rate": 4.306846999154692e-05, "loss": 128.8065, "step": 1640, "task_loss": 2.936715602874756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1527522947160667, "compression/movement_sparsity/importance_threshold": -0.00109036803751151, "compression/movement_sparsity/linear_layer_sparsity": 0.7172322442466368, "compression/movement_sparsity/model_sparsity": 0.6925931101922997, "compression_loss": 123.87187194824219, "distillation_loss": 6.47765588760376, "epoch": 1.39, "learning_rate": 4.3064243448858837e-05, "loss": 129.0338, "step": 1641, "task_loss": 3.2811150550842285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1541832587308194, "compression/movement_sparsity/importance_threshold": -0.0010858747682954049, "compression/movement_sparsity/linear_layer_sparsity": 0.7184827794032803, "compression/movement_sparsity/model_sparsity": 0.6938006856192165, "compression_loss": 124.0246810913086, "distillation_loss": 4.720431804656982, "epoch": 1.39, "learning_rate": 4.306001690617075e-05, "loss": 128.9026, "step": 1642, "task_loss": 2.5120980739593506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.155610286120782, "compression/movement_sparsity/importance_threshold": -0.0010813938601972606, "compression/movement_sparsity/linear_layer_sparsity": 0.7197101220538721, "compression/movement_sparsity/model_sparsity": 0.6949858652740127, "compression_loss": 124.17704010009766, "distillation_loss": 5.344167709350586, "epoch": 1.39, "learning_rate": 4.3055790363482676e-05, "loss": 129.3675, "step": 1643, "task_loss": 2.4594790935516357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1570333823083034, "compression/movement_sparsity/importance_threshold": -0.0010769252961907423, "compression/movement_sparsity/linear_layer_sparsity": 0.720708234505832, "compression/movement_sparsity/model_sparsity": 0.6959496894926948, "compression_loss": 124.32897186279297, "distillation_loss": 3.9895737171173096, "epoch": 1.39, "learning_rate": 4.3051563820794596e-05, "loss": 128.8685, "step": 1644, "task_loss": 1.6441888809204102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1584525527157319, "compression/movement_sparsity/importance_threshold": -0.0010724690592495167, "compression/movement_sparsity/linear_layer_sparsity": 0.7219841442912046, "compression/movement_sparsity/model_sparsity": 0.6971817678517822, "compression_loss": 124.48047637939453, "distillation_loss": 5.422521591186523, "epoch": 1.39, "learning_rate": 4.304733727810651e-05, "loss": 129.1549, "step": 1645, "task_loss": 2.982480049133301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1598678027654148, "compression/movement_sparsity/importance_threshold": -0.0010680251323472519, "compression/movement_sparsity/linear_layer_sparsity": 0.7231102269102325, "compression/movement_sparsity/model_sparsity": 0.6982691660686112, "compression_loss": 124.63160705566406, "distillation_loss": 3.476102590560913, "epoch": 1.39, "learning_rate": 4.304311073541843e-05, "loss": 129.4896, "step": 1646, "task_loss": 1.5611273050308228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1612791378797014, "compression/movement_sparsity/importance_threshold": -0.0010635934984576131, "compression/movement_sparsity/linear_layer_sparsity": 0.7242255539300531, "compression/movement_sparsity/model_sparsity": 0.6993461781741537, "compression_loss": 124.78230285644531, "distillation_loss": 4.061097621917725, "epoch": 1.39, "learning_rate": 4.303888419273035e-05, "loss": 129.3975, "step": 1647, "task_loss": 3.164276123046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.162686563480939, "compression/movement_sparsity/importance_threshold": -0.0010591741405542683, "compression/movement_sparsity/linear_layer_sparsity": 0.7253236266793044, "compression/movement_sparsity/model_sparsity": 0.7004065287464013, "compression_loss": 124.93258666992188, "distillation_loss": 5.3908233642578125, "epoch": 1.39, "learning_rate": 4.303465765004227e-05, "loss": 129.998, "step": 1648, "task_loss": 3.5379457473754883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1640900849914761, "compression/movement_sparsity/importance_threshold": -0.0010547670416108834, "compression/movement_sparsity/linear_layer_sparsity": 0.7264349710271344, "compression/movement_sparsity/model_sparsity": 0.7014796949969884, "compression_loss": 125.0823745727539, "distillation_loss": 3.8984055519104004, "epoch": 1.39, "learning_rate": 4.303043110735419e-05, "loss": 129.2775, "step": 1649, "task_loss": 2.8166308403015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1654897078336612, "compression/movement_sparsity/importance_threshold": -0.0010503721846011251, "compression/movement_sparsity/linear_layer_sparsity": 0.7276892980690861, "compression/movement_sparsity/model_sparsity": 0.7026909320462876, "compression_loss": 125.23187255859375, "distillation_loss": 4.039017677307129, "epoch": 1.39, "learning_rate": 4.302620456466611e-05, "loss": 129.7654, "step": 1650, "task_loss": 1.8861942291259766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1668854374298419, "compression/movement_sparsity/importance_threshold": -0.0010459895524986612, "compression/movement_sparsity/linear_layer_sparsity": 0.7286833563040499, "compression/movement_sparsity/model_sparsity": 0.7036508413227995, "compression_loss": 125.38087463378906, "distillation_loss": 5.553791046142578, "epoch": 1.4, "learning_rate": 4.302197802197803e-05, "loss": 130.7731, "step": 1651, "task_loss": 1.9815449714660645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.168277279202367, "compression/movement_sparsity/importance_threshold": -0.0010416191282771564, "compression/movement_sparsity/linear_layer_sparsity": 0.7298072806487358, "compression/movement_sparsity/model_sparsity": 0.7047361554086498, "compression_loss": 125.52945709228516, "distillation_loss": 4.8394060134887695, "epoch": 1.4, "learning_rate": 4.301775147928994e-05, "loss": 129.5692, "step": 1652, "task_loss": 1.6050645112991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1696652385735846, "compression/movement_sparsity/importance_threshold": -0.001037260894910279, "compression/movement_sparsity/linear_layer_sparsity": 0.7307350166633089, "compression/movement_sparsity/model_sparsity": 0.7056320208370724, "compression_loss": 125.67767333984375, "distillation_loss": 3.5678977966308594, "epoch": 1.4, "learning_rate": 4.301352493660186e-05, "loss": 130.6722, "step": 1653, "task_loss": 1.6422040462493896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1710493209658424, "compression/movement_sparsity/importance_threshold": -0.0010329148353716957, "compression/movement_sparsity/linear_layer_sparsity": 0.7318193050747731, "compression/movement_sparsity/model_sparsity": 0.7066790606059417, "compression_loss": 125.82548522949219, "distillation_loss": 5.687063217163086, "epoch": 1.4, "learning_rate": 4.300929839391378e-05, "loss": 130.928, "step": 1654, "task_loss": 2.9897544384002686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.172429531801489, "compression/movement_sparsity/importance_threshold": -0.0010285809326350723, "compression/movement_sparsity/linear_layer_sparsity": 0.7328589017059381, "compression/movement_sparsity/model_sparsity": 0.7076829438946529, "compression_loss": 125.97283935546875, "distillation_loss": 6.145558834075928, "epoch": 1.4, "learning_rate": 4.30050718512257e-05, "loss": 130.9872, "step": 1655, "task_loss": 2.6128365993499756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1738058765028727, "compression/movement_sparsity/importance_threshold": -0.0010242591696740765, "compression/movement_sparsity/linear_layer_sparsity": 0.7340143058531827, "compression/movement_sparsity/model_sparsity": 0.7087986563550007, "compression_loss": 126.11978912353516, "distillation_loss": 5.220210075378418, "epoch": 1.4, "learning_rate": 4.300084530853762e-05, "loss": 131.198, "step": 1656, "task_loss": 2.4230151176452637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1751783604923411, "compression/movement_sparsity/importance_threshold": -0.001019949529462375, "compression/movement_sparsity/linear_layer_sparsity": 0.7351830531440119, "compression/movement_sparsity/model_sparsity": 0.7099272535809026, "compression_loss": 126.26640319824219, "distillation_loss": 5.404793739318848, "epoch": 1.4, "learning_rate": 4.299661876584954e-05, "loss": 130.8484, "step": 1657, "task_loss": 3.106280565261841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.176546989192243, "compression/movement_sparsity/importance_threshold": -0.0010156519949736337, "compression/movement_sparsity/linear_layer_sparsity": 0.7362322964267944, "compression/movement_sparsity/model_sparsity": 0.7109404521290716, "compression_loss": 126.41250610351562, "distillation_loss": 4.530086994171143, "epoch": 1.4, "learning_rate": 4.299239222316145e-05, "loss": 131.3589, "step": 1658, "task_loss": 1.633629560470581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1779117680249265, "compression/movement_sparsity/importance_threshold": -0.0010113665491815194, "compression/movement_sparsity/linear_layer_sparsity": 0.7373045652772817, "compression/movement_sparsity/model_sparsity": 0.71197588524586, "compression_loss": 126.55818939208984, "distillation_loss": 5.269854545593262, "epoch": 1.4, "learning_rate": 4.298816568047337e-05, "loss": 131.2301, "step": 1659, "task_loss": 1.6925129890441895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1792727024127396, "compression/movement_sparsity/importance_threshold": -0.0010070931750596995, "compression/movement_sparsity/linear_layer_sparsity": 0.7382474807572552, "compression/movement_sparsity/model_sparsity": 0.7128864086783491, "compression_loss": 126.70355224609375, "distillation_loss": 4.663570404052734, "epoch": 1.4, "learning_rate": 4.29839391377853e-05, "loss": 130.4322, "step": 1660, "task_loss": 1.6720130443572998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1806297977780305, "compression/movement_sparsity/importance_threshold": -0.0010028318555818402, "compression/movement_sparsity/linear_layer_sparsity": 0.7392262879818127, "compression/movement_sparsity/model_sparsity": 0.7138315908635797, "compression_loss": 126.84844207763672, "distillation_loss": 4.284666061401367, "epoch": 1.4, "learning_rate": 4.297971259509721e-05, "loss": 131.4044, "step": 1661, "task_loss": 2.589498281478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1819830595431478, "compression/movement_sparsity/importance_threshold": -0.0009985825737216084, "compression/movement_sparsity/linear_layer_sparsity": 0.7402984495147913, "compression/movement_sparsity/model_sparsity": 0.7148669203495458, "compression_loss": 126.9928970336914, "distillation_loss": 6.332771301269531, "epoch": 1.4, "learning_rate": 4.297548605240913e-05, "loss": 132.1842, "step": 1662, "task_loss": 3.254516839981079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.183332493130439, "compression/movement_sparsity/importance_threshold": -0.0009943453124526712, "compression/movement_sparsity/linear_layer_sparsity": 0.7413892127492819, "compression/movement_sparsity/model_sparsity": 0.7159202125113514, "compression_loss": 127.13699340820312, "distillation_loss": 4.129354000091553, "epoch": 1.41, "learning_rate": 4.297125950972105e-05, "loss": 132.2666, "step": 1663, "task_loss": 1.7263877391815186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1846781039622531, "compression/movement_sparsity/importance_threshold": -0.000990120054748694, "compression/movement_sparsity/linear_layer_sparsity": 0.7423282528747738, "compression/movement_sparsity/model_sparsity": 0.7168269937197074, "compression_loss": 127.28064727783203, "distillation_loss": 4.1973419189453125, "epoch": 1.41, "learning_rate": 4.2967032967032963e-05, "loss": 131.7189, "step": 1664, "task_loss": 2.724881172180176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1860198974609375, "compression/movement_sparsity/importance_threshold": -0.0009859067835833457, "compression/movement_sparsity/linear_layer_sparsity": 0.7434288416233963, "compression/movement_sparsity/model_sparsity": 0.7178897738590077, "compression_loss": 127.4238510131836, "distillation_loss": 5.392586708068848, "epoch": 1.41, "learning_rate": 4.296280642434489e-05, "loss": 131.892, "step": 1665, "task_loss": 3.000189781188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1873578790488408, "compression/movement_sparsity/importance_threshold": -0.0009817054819302915, "compression/movement_sparsity/linear_layer_sparsity": 0.7442039840647332, "compression/movement_sparsity/model_sparsity": 0.7186382877728675, "compression_loss": 127.56666564941406, "distillation_loss": 4.866158485412598, "epoch": 1.41, "learning_rate": 4.295857988165681e-05, "loss": 131.7309, "step": 1666, "task_loss": 2.953197479248047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1886920541483113, "compression/movement_sparsity/importance_threshold": -0.000977516132763198, "compression/movement_sparsity/linear_layer_sparsity": 0.7451646546303164, "compression/movement_sparsity/model_sparsity": 0.7195659563491548, "compression_loss": 127.70897674560547, "distillation_loss": 3.580167770385742, "epoch": 1.41, "learning_rate": 4.295435333896873e-05, "loss": 132.094, "step": 1667, "task_loss": 2.222722291946411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1900224281816971, "compression/movement_sparsity/importance_threshold": -0.0009733387190557327, "compression/movement_sparsity/linear_layer_sparsity": 0.7460860469877072, "compression/movement_sparsity/model_sparsity": 0.7204556960445347, "compression_loss": 127.8509292602539, "distillation_loss": 5.3917012214660645, "epoch": 1.41, "learning_rate": 4.295012679628064e-05, "loss": 132.6568, "step": 1668, "task_loss": 2.057342052459717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1913490065713461, "compression/movement_sparsity/importance_threshold": -0.0009691732237815617, "compression/movement_sparsity/linear_layer_sparsity": 0.7473162156660287, "compression/movement_sparsity/model_sparsity": 0.7216436046443143, "compression_loss": 127.99237060546875, "distillation_loss": 4.974492073059082, "epoch": 1.41, "learning_rate": 4.294590025359256e-05, "loss": 132.8543, "step": 1669, "task_loss": 2.2963058948516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.192671794739607, "compression/movement_sparsity/importance_threshold": -0.0009650196299143517, "compression/movement_sparsity/linear_layer_sparsity": 0.7484738138601182, "compression/movement_sparsity/model_sparsity": 0.7227614357792483, "compression_loss": 128.1334228515625, "distillation_loss": 6.833734512329102, "epoch": 1.41, "learning_rate": 4.294167371090448e-05, "loss": 133.6974, "step": 1670, "task_loss": 3.656033515930176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1939907981088276, "compression/movement_sparsity/importance_threshold": -0.0009608779204277706, "compression/movement_sparsity/linear_layer_sparsity": 0.7496242217811234, "compression/movement_sparsity/model_sparsity": 0.7238723236490983, "compression_loss": 128.27403259277344, "distillation_loss": 5.081347465515137, "epoch": 1.41, "learning_rate": 4.29374471682164e-05, "loss": 132.7393, "step": 1671, "task_loss": 2.9365384578704834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1953060221013563, "compression/movement_sparsity/importance_threshold": -0.0009567480782954833, "compression/movement_sparsity/linear_layer_sparsity": 0.7507504951868336, "compression/movement_sparsity/model_sparsity": 0.7249599060985, "compression_loss": 128.41429138183594, "distillation_loss": 4.861852645874023, "epoch": 1.41, "learning_rate": 4.293322062552832e-05, "loss": 133.4224, "step": 1672, "task_loss": 1.4553730487823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1966174721395413, "compression/movement_sparsity/importance_threshold": -0.0009526300864911574, "compression/movement_sparsity/linear_layer_sparsity": 0.7518278675810691, "compression/movement_sparsity/model_sparsity": 0.7260002674366084, "compression_loss": 128.55416870117188, "distillation_loss": 5.963460922241211, "epoch": 1.41, "learning_rate": 4.292899408284024e-05, "loss": 133.7726, "step": 1673, "task_loss": 3.1765036582946777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1979251536457305, "compression/movement_sparsity/importance_threshold": -0.000948523927988461, "compression/movement_sparsity/linear_layer_sparsity": 0.7528275540231569, "compression/movement_sparsity/model_sparsity": 0.7269656115740153, "compression_loss": 128.69354248046875, "distillation_loss": 5.412083148956299, "epoch": 1.41, "learning_rate": 4.2924767540152154e-05, "loss": 133.2293, "step": 1674, "task_loss": 2.20062518119812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.1992290720422725, "compression/movement_sparsity/importance_threshold": -0.0009444295857610582, "compression/movement_sparsity/linear_layer_sparsity": 0.7538527939564884, "compression/movement_sparsity/model_sparsity": 0.7279556313616299, "compression_loss": 128.83245849609375, "distillation_loss": 5.566836833953857, "epoch": 1.42, "learning_rate": 4.2920540997464074e-05, "loss": 133.8009, "step": 1675, "task_loss": 3.1525473594665527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2005292327515154, "compression/movement_sparsity/importance_threshold": -0.0009403470427826171, "compression/movement_sparsity/linear_layer_sparsity": 0.7547149708973995, "compression/movement_sparsity/model_sparsity": 0.7287881898722541, "compression_loss": 128.9710235595703, "distillation_loss": 4.037566184997559, "epoch": 1.42, "learning_rate": 4.2916314454775994e-05, "loss": 134.0059, "step": 1676, "task_loss": 2.0260658264160156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2018256411958068, "compression/movement_sparsity/importance_threshold": -0.0009362762820268053, "compression/movement_sparsity/linear_layer_sparsity": 0.7557893264772231, "compression/movement_sparsity/model_sparsity": 0.7298256380328065, "compression_loss": 129.10919189453125, "distillation_loss": 6.758441925048828, "epoch": 1.42, "learning_rate": 4.291208791208791e-05, "loss": 134.5959, "step": 1677, "task_loss": 2.210911989212036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2031183027974959, "compression/movement_sparsity/importance_threshold": -0.0009322172864672873, "compression/movement_sparsity/linear_layer_sparsity": 0.7571184418985869, "compression/movement_sparsity/model_sparsity": 0.7311090942506092, "compression_loss": 129.2469024658203, "distillation_loss": 7.2887115478515625, "epoch": 1.42, "learning_rate": 4.290786136939983e-05, "loss": 134.6074, "step": 1678, "task_loss": 3.240959405899048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2044072229789304, "compression/movement_sparsity/importance_threshold": -0.0009281700390777318, "compression/movement_sparsity/linear_layer_sparsity": 0.758223072940038, "compression/movement_sparsity/model_sparsity": 0.7321757778175438, "compression_loss": 129.38433837890625, "distillation_loss": 4.416586875915527, "epoch": 1.42, "learning_rate": 4.290363482671175e-05, "loss": 134.093, "step": 1679, "task_loss": 2.084963798522949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2056924071624582, "compression/movement_sparsity/importance_threshold": -0.0009241345228318048, "compression/movement_sparsity/linear_layer_sparsity": 0.7592856235939022, "compression/movement_sparsity/model_sparsity": 0.7332018265876596, "compression_loss": 129.5212860107422, "distillation_loss": 4.9988274574279785, "epoch": 1.42, "learning_rate": 4.289940828402367e-05, "loss": 134.1176, "step": 1680, "task_loss": 2.9756672382354736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.206973860770428, "compression/movement_sparsity/importance_threshold": -0.0009201107207031734, "compression/movement_sparsity/linear_layer_sparsity": 0.7602929534274446, "compression/movement_sparsity/model_sparsity": 0.7341745515425108, "compression_loss": 129.65785217285156, "distillation_loss": 5.318084716796875, "epoch": 1.42, "learning_rate": 4.2895181741335585e-05, "loss": 134.7069, "step": 1681, "task_loss": 2.87835693359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2082515892251877, "compression/movement_sparsity/importance_threshold": -0.0009160986156655031, "compression/movement_sparsity/linear_layer_sparsity": 0.761374880853717, "compression/movement_sparsity/model_sparsity": 0.7352193114332927, "compression_loss": 129.79391479492188, "distillation_loss": 7.476860046386719, "epoch": 1.42, "learning_rate": 4.289095519864751e-05, "loss": 136.285, "step": 1682, "task_loss": 3.2668232917785645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2095255979490855, "compression/movement_sparsity/importance_threshold": -0.0009120981906924619, "compression/movement_sparsity/linear_layer_sparsity": 0.7622706362506907, "compression/movement_sparsity/model_sparsity": 0.7360842948767143, "compression_loss": 129.92970275878906, "distillation_loss": 5.667370796203613, "epoch": 1.42, "learning_rate": 4.288672865595943e-05, "loss": 135.707, "step": 1683, "task_loss": 2.8117642402648926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2107958923644702, "compression/movement_sparsity/importance_threshold": -0.0009081094287577154, "compression/movement_sparsity/linear_layer_sparsity": 0.763171447494742, "compression/movement_sparsity/model_sparsity": 0.736954160483313, "compression_loss": 130.0651092529297, "distillation_loss": 6.132251739501953, "epoch": 1.42, "learning_rate": 4.2882502113271345e-05, "loss": 135.2298, "step": 1684, "task_loss": 3.029622793197632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2120624778936888, "compression/movement_sparsity/importance_threshold": -0.0009041323128349322, "compression/movement_sparsity/linear_layer_sparsity": 0.7641220659870082, "compression/movement_sparsity/model_sparsity": 0.7378721223059254, "compression_loss": 130.20005798339844, "distillation_loss": 6.328795909881592, "epoch": 1.42, "learning_rate": 4.2878275570583264e-05, "loss": 136.1438, "step": 1685, "task_loss": 3.421241521835327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2133253599590907, "compression/movement_sparsity/importance_threshold": -0.0009001668258977768, "compression/movement_sparsity/linear_layer_sparsity": 0.7651221697749633, "compression/movement_sparsity/model_sparsity": 0.7388378694520851, "compression_loss": 130.33468627929688, "distillation_loss": 6.867501258850098, "epoch": 1.42, "learning_rate": 4.2874049027895184e-05, "loss": 136.5501, "step": 1686, "task_loss": 5.162953853607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2145845439830232, "compression/movement_sparsity/importance_threshold": -0.0008962129509199174, "compression/movement_sparsity/linear_layer_sparsity": 0.7660799308436435, "compression/movement_sparsity/model_sparsity": 0.7397627284816384, "compression_loss": 130.46890258789062, "distillation_loss": 4.743262767791748, "epoch": 1.43, "learning_rate": 4.2869822485207104e-05, "loss": 135.8861, "step": 1687, "task_loss": 1.9818406105041504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.215840035387835, "compression/movement_sparsity/importance_threshold": -0.0008922706708750206, "compression/movement_sparsity/linear_layer_sparsity": 0.7670572117747435, "compression/movement_sparsity/model_sparsity": 0.7407064368062873, "compression_loss": 130.6027069091797, "distillation_loss": 4.071943759918213, "epoch": 1.43, "learning_rate": 4.2865595942519024e-05, "loss": 135.8047, "step": 1688, "task_loss": 1.5186123847961426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.217091839595874, "compression/movement_sparsity/importance_threshold": -0.000888339968736752, "compression/movement_sparsity/linear_layer_sparsity": 0.7681103065636724, "compression/movement_sparsity/model_sparsity": 0.741723354549518, "compression_loss": 130.73617553710938, "distillation_loss": 4.814032554626465, "epoch": 1.43, "learning_rate": 4.286136939983094e-05, "loss": 135.563, "step": 1689, "task_loss": 1.6335136890411377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2183399620294888, "compression/movement_sparsity/importance_threshold": -0.0008844208274787799, "compression/movement_sparsity/linear_layer_sparsity": 0.769005704235617, "compression/movement_sparsity/model_sparsity": 0.7425879925568659, "compression_loss": 130.8692626953125, "distillation_loss": 3.8926427364349365, "epoch": 1.43, "learning_rate": 4.2857142857142856e-05, "loss": 136.1487, "step": 1690, "task_loss": 2.4474847316741943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.219584408111027, "compression/movement_sparsity/importance_threshold": -0.0008805132300747703, "compression/movement_sparsity/linear_layer_sparsity": 0.7698828221585757, "compression/movement_sparsity/model_sparsity": 0.7434349787808407, "compression_loss": 131.0019073486328, "distillation_loss": 5.876618385314941, "epoch": 1.43, "learning_rate": 4.2852916314454776e-05, "loss": 136.3073, "step": 1691, "task_loss": 3.0356574058532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2208251832628374, "compression/movement_sparsity/importance_threshold": -0.0008766171594983894, "compression/movement_sparsity/linear_layer_sparsity": 0.7708376141095146, "compression/movement_sparsity/model_sparsity": 0.7443569706909812, "compression_loss": 131.13417053222656, "distillation_loss": 7.163200855255127, "epoch": 1.43, "learning_rate": 4.2848689771766696e-05, "loss": 136.3789, "step": 1692, "task_loss": 4.30001163482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2220622929072678, "compression/movement_sparsity/importance_threshold": -0.0008727325987233046, "compression/movement_sparsity/linear_layer_sparsity": 0.7717705967578474, "compression/movement_sparsity/model_sparsity": 0.7452579025151534, "compression_loss": 131.26600646972656, "distillation_loss": 8.87173843383789, "epoch": 1.43, "learning_rate": 4.2844463229078616e-05, "loss": 136.8873, "step": 1693, "task_loss": 4.384936332702637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2232957424666666, "compression/movement_sparsity/importance_threshold": -0.000868859530723182, "compression/movement_sparsity/linear_layer_sparsity": 0.7724888370712261, "compression/movement_sparsity/model_sparsity": 0.7459514690642018, "compression_loss": 131.3975067138672, "distillation_loss": 6.06578254699707, "epoch": 1.43, "learning_rate": 4.2840236686390535e-05, "loss": 136.4462, "step": 1694, "task_loss": 1.5085026025772095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2245255373633819, "compression/movement_sparsity/importance_threshold": -0.0008649979384716894, "compression/movement_sparsity/linear_layer_sparsity": 0.7732114535541269, "compression/movement_sparsity/model_sparsity": 0.7466492614478868, "compression_loss": 131.52853393554688, "distillation_loss": 4.902538299560547, "epoch": 1.43, "learning_rate": 4.2836010143702455e-05, "loss": 136.909, "step": 1695, "task_loss": 2.141193389892578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.225751683019762, "compression/movement_sparsity/importance_threshold": -0.0008611478049424925, "compression/movement_sparsity/linear_layer_sparsity": 0.7741124317365251, "compression/movement_sparsity/model_sparsity": 0.7475192882579865, "compression_loss": 131.65914916992188, "distillation_loss": 4.675021171569824, "epoch": 1.43, "learning_rate": 4.2831783601014375e-05, "loss": 136.394, "step": 1696, "task_loss": 2.1710503101348877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2269741848581548, "compression/movement_sparsity/importance_threshold": -0.0008573091131092586, "compression/movement_sparsity/linear_layer_sparsity": 0.7751762463521586, "compression/movement_sparsity/model_sparsity": 0.7485465575688964, "compression_loss": 131.78932189941406, "distillation_loss": 6.732913017272949, "epoch": 1.43, "learning_rate": 4.282755705832629e-05, "loss": 137.5181, "step": 1697, "task_loss": 4.0819196701049805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.228193048300909, "compression/movement_sparsity/importance_threshold": -0.0008534818459456535, "compression/movement_sparsity/linear_layer_sparsity": 0.7759912751342375, "compression/movement_sparsity/model_sparsity": 0.7493335876049891, "compression_loss": 131.91915893554688, "distillation_loss": 6.119390487670898, "epoch": 1.44, "learning_rate": 4.282333051563821e-05, "loss": 137.3243, "step": 1698, "task_loss": 3.9338090419769287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2294082787703722, "compression/movement_sparsity/importance_threshold": -0.0008496659864253454, "compression/movement_sparsity/linear_layer_sparsity": 0.7769348345192634, "compression/movement_sparsity/model_sparsity": 0.7502447328224111, "compression_loss": 132.04861450195312, "distillation_loss": 5.362759113311768, "epoch": 1.44, "learning_rate": 4.2819103972950134e-05, "loss": 136.8099, "step": 1699, "task_loss": 3.3708407878875732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.230619881688893, "compression/movement_sparsity/importance_threshold": -0.0008458615175220003, "compression/movement_sparsity/linear_layer_sparsity": 0.7778929294646373, "compression/movement_sparsity/model_sparsity": 0.7511699142589667, "compression_loss": 132.1775665283203, "distillation_loss": 6.90968132019043, "epoch": 1.44, "learning_rate": 4.281487743026205e-05, "loss": 138.3985, "step": 1700, "task_loss": 2.7389421463012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2318278624788197, "compression/movement_sparsity/importance_threshold": -0.0008420684222092841, "compression/movement_sparsity/linear_layer_sparsity": 0.7787619031811008, "compression/movement_sparsity/model_sparsity": 0.7520090360549938, "compression_loss": 132.30618286132812, "distillation_loss": 4.572964191436768, "epoch": 1.44, "learning_rate": 4.281065088757397e-05, "loss": 137.3438, "step": 1701, "task_loss": 2.8707261085510254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2330322265625, "compression/movement_sparsity/importance_threshold": -0.0008382866834608649, "compression/movement_sparsity/linear_layer_sparsity": 0.7796568000380047, "compression/movement_sparsity/model_sparsity": 0.7528731904518383, "compression_loss": 132.43434143066406, "distillation_loss": 4.620830535888672, "epoch": 1.44, "learning_rate": 4.2806424344885886e-05, "loss": 137.3701, "step": 1702, "task_loss": 1.9425766468048096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2342329793622824, "compression/movement_sparsity/importance_threshold": -0.0008345162842504088, "compression/movement_sparsity/linear_layer_sparsity": 0.7804211749559664, "compression/movement_sparsity/model_sparsity": 0.7536113067398758, "compression_loss": 132.56214904785156, "distillation_loss": 5.746803283691406, "epoch": 1.44, "learning_rate": 4.28021978021978e-05, "loss": 137.6664, "step": 1703, "task_loss": 2.6319899559020996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2354301263005154, "compression/movement_sparsity/importance_threshold": -0.0008307572075515819, "compression/movement_sparsity/linear_layer_sparsity": 0.7814443877807998, "compression/movement_sparsity/model_sparsity": 0.7545993690564053, "compression_loss": 132.68948364257812, "distillation_loss": 4.8597211837768555, "epoch": 1.44, "learning_rate": 4.2797971259509726e-05, "loss": 137.4753, "step": 1704, "task_loss": 2.681061267852783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2366236727995465, "compression/movement_sparsity/importance_threshold": -0.0008270094363380526, "compression/movement_sparsity/linear_layer_sparsity": 0.7823389865335129, "compression/movement_sparsity/model_sparsity": 0.755463235589855, "compression_loss": 132.8164520263672, "distillation_loss": 5.233499050140381, "epoch": 1.44, "learning_rate": 4.2793744716821646e-05, "loss": 137.7237, "step": 1705, "task_loss": 3.111027717590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2378136242817244, "compression/movement_sparsity/importance_threshold": -0.0008232729535834859, "compression/movement_sparsity/linear_layer_sparsity": 0.7831724500787567, "compression/movement_sparsity/model_sparsity": 0.756268067098286, "compression_loss": 132.94300842285156, "distillation_loss": 5.164727210998535, "epoch": 1.44, "learning_rate": 4.278951817413356e-05, "loss": 138.0709, "step": 1706, "task_loss": 2.71216082572937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2389999861693972, "compression/movement_sparsity/importance_threshold": -0.000819547742261549, "compression/movement_sparsity/linear_layer_sparsity": 0.7840868071772423, "compression/movement_sparsity/model_sparsity": 0.7571510132175472, "compression_loss": 133.06924438476562, "distillation_loss": 6.585019111633301, "epoch": 1.44, "learning_rate": 4.278529163144548e-05, "loss": 138.0081, "step": 1707, "task_loss": 3.1112542152404785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.240182763884913, "compression/movement_sparsity/importance_threshold": -0.0008158337853459096, "compression/movement_sparsity/linear_layer_sparsity": 0.7851427756905714, "compression/movement_sparsity/model_sparsity": 0.7581707059639043, "compression_loss": 133.19509887695312, "distillation_loss": 6.9932451248168945, "epoch": 1.44, "learning_rate": 4.27810650887574e-05, "loss": 138.4422, "step": 1708, "task_loss": 2.9983417987823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.24136196285062, "compression/movement_sparsity/importance_threshold": -0.0008121310658102329, "compression/movement_sparsity/linear_layer_sparsity": 0.7861185184040466, "compression/movement_sparsity/model_sparsity": 0.7591129289134357, "compression_loss": 133.32052612304688, "distillation_loss": 5.459270477294922, "epoch": 1.44, "learning_rate": 4.277683854606932e-05, "loss": 138.876, "step": 1709, "task_loss": 2.3245437145233154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2425375884888668, "compression/movement_sparsity/importance_threshold": -0.0008084395666281864, "compression/movement_sparsity/linear_layer_sparsity": 0.7869793836865177, "compression/movement_sparsity/model_sparsity": 0.7599442208251227, "compression_loss": 133.4455108642578, "distillation_loss": 5.4145917892456055, "epoch": 1.45, "learning_rate": 4.277261200338124e-05, "loss": 139.108, "step": 1710, "task_loss": 2.6897339820861816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2437096462220008, "compression/movement_sparsity/importance_threshold": -0.0008047592707734375, "compression/movement_sparsity/linear_layer_sparsity": 0.7879688153444386, "compression/movement_sparsity/model_sparsity": 0.7608996624617462, "compression_loss": 133.57017517089844, "distillation_loss": 6.585570335388184, "epoch": 1.45, "learning_rate": 4.276838546069316e-05, "loss": 138.994, "step": 1711, "task_loss": 3.6023776531219482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2448781414723709, "compression/movement_sparsity/importance_threshold": -0.0008010901612196513, "compression/movement_sparsity/linear_layer_sparsity": 0.788922999162828, "compression/movement_sparsity/model_sparsity": 0.7618210671305612, "compression_loss": 133.69448852539062, "distillation_loss": 6.245987892150879, "epoch": 1.45, "learning_rate": 4.276415891800508e-05, "loss": 138.9138, "step": 1712, "task_loss": 3.380957841873169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.246043079662325, "compression/movement_sparsity/importance_threshold": -0.0007974322209404958, "compression/movement_sparsity/linear_layer_sparsity": 0.7897434176686783, "compression/movement_sparsity/model_sparsity": 0.762613301736833, "compression_loss": 133.81842041015625, "distillation_loss": 7.845611572265625, "epoch": 1.45, "learning_rate": 4.275993237531699e-05, "loss": 139.9407, "step": 1713, "task_loss": 4.209351539611816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.247204466214211, "compression/movement_sparsity/importance_threshold": -0.0007937854329096378, "compression/movement_sparsity/linear_layer_sparsity": 0.7904920288370254, "compression/movement_sparsity/model_sparsity": 0.76333619580855, "compression_loss": 133.9419708251953, "distillation_loss": 5.992743492126465, "epoch": 1.45, "learning_rate": 4.275570583262891e-05, "loss": 139.5533, "step": 1714, "task_loss": 3.66042423248291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2483623065503777, "compression/movement_sparsity/importance_threshold": -0.0007901497801007431, "compression/movement_sparsity/linear_layer_sparsity": 0.7913430328328617, "compression/movement_sparsity/model_sparsity": 0.7641579651991348, "compression_loss": 134.06515502929688, "distillation_loss": 5.880227088928223, "epoch": 1.45, "learning_rate": 4.275147928994083e-05, "loss": 139.4521, "step": 1715, "task_loss": 2.656493902206421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2495166060931733, "compression/movement_sparsity/importance_threshold": -0.000786525245487479, "compression/movement_sparsity/linear_layer_sparsity": 0.7920941957730829, "compression/movement_sparsity/model_sparsity": 0.764883323381512, "compression_loss": 134.18798828125, "distillation_loss": 6.351490020751953, "epoch": 1.45, "learning_rate": 4.274725274725275e-05, "loss": 140.4867, "step": 1716, "task_loss": 2.8093924522399902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2506673702649453, "compression/movement_sparsity/importance_threshold": -0.0007829118120435121, "compression/movement_sparsity/linear_layer_sparsity": 0.7927116172491002, "compression/movement_sparsity/model_sparsity": 0.7654795345304175, "compression_loss": 134.3104705810547, "distillation_loss": 6.49608039855957, "epoch": 1.45, "learning_rate": 4.274302620456467e-05, "loss": 140.3823, "step": 1717, "task_loss": 3.8049371242523193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2518146044880425, "compression/movement_sparsity/importance_threshold": -0.000779309462742509, "compression/movement_sparsity/linear_layer_sparsity": 0.7936218605097515, "compression/movement_sparsity/model_sparsity": 0.7663585081348295, "compression_loss": 134.43263244628906, "distillation_loss": 4.627270698547363, "epoch": 1.45, "learning_rate": 4.273879966187659e-05, "loss": 139.556, "step": 1718, "task_loss": 2.935063600540161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2529583141848128, "compression/movement_sparsity/importance_threshold": -0.000775718180558136, "compression/movement_sparsity/linear_layer_sparsity": 0.794475869395832, "compression/movement_sparsity/model_sparsity": 0.7671831791884346, "compression_loss": 134.55433654785156, "distillation_loss": 6.83725643157959, "epoch": 1.45, "learning_rate": 4.27345731191885e-05, "loss": 141.2668, "step": 1719, "task_loss": 3.363121509552002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2540985047776043, "compression/movement_sparsity/importance_threshold": -0.0007721379484640616, "compression/movement_sparsity/linear_layer_sparsity": 0.7953367704508061, "compression/movement_sparsity/model_sparsity": 0.7680145056437289, "compression_loss": 134.67575073242188, "distillation_loss": 6.054067611694336, "epoch": 1.45, "learning_rate": 4.273034657650042e-05, "loss": 141.2501, "step": 1720, "task_loss": 3.6818389892578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.255235181688766, "compression/movement_sparsity/importance_threshold": -0.0007685687494339505, "compression/movement_sparsity/linear_layer_sparsity": 0.7962307372226343, "compression/movement_sparsity/model_sparsity": 0.7688777619067814, "compression_loss": 134.79669189453125, "distillation_loss": 5.380577564239502, "epoch": 1.45, "learning_rate": 4.272612003381235e-05, "loss": 140.2347, "step": 1721, "task_loss": 3.1315689086914062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2563683503406449, "compression/movement_sparsity/importance_threshold": -0.000765010566441471, "compression/movement_sparsity/linear_layer_sparsity": 0.7969657548398765, "compression/movement_sparsity/model_sparsity": 0.7695875294076926, "compression_loss": 134.91725158691406, "distillation_loss": 4.683897018432617, "epoch": 1.46, "learning_rate": 4.272189349112426e-05, "loss": 139.8576, "step": 1722, "task_loss": 2.7050366401672363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.25749801615559, "compression/movement_sparsity/importance_threshold": -0.0007614633824602887, "compression/movement_sparsity/linear_layer_sparsity": 0.7977235356931357, "compression/movement_sparsity/model_sparsity": 0.7703192781574357, "compression_loss": 135.03762817382812, "distillation_loss": 6.378039360046387, "epoch": 1.46, "learning_rate": 4.271766694843618e-05, "loss": 140.0228, "step": 1723, "task_loss": 2.7763330936431885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2586241845559494, "compression/movement_sparsity/importance_threshold": -0.0007579271804640702, "compression/movement_sparsity/linear_layer_sparsity": 0.79850086025715, "compression/movement_sparsity/model_sparsity": 0.7710698992313461, "compression_loss": 135.15745544433594, "distillation_loss": 7.575067520141602, "epoch": 1.46, "learning_rate": 4.27134404057481e-05, "loss": 141.3022, "step": 1724, "task_loss": 3.397301197052002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.259746860964071, "compression/movement_sparsity/importance_threshold": -0.0007544019434264828, "compression/movement_sparsity/linear_layer_sparsity": 0.7992693609371136, "compression/movement_sparsity/model_sparsity": 0.7718119995487683, "compression_loss": 135.27696228027344, "distillation_loss": 5.840609550476074, "epoch": 1.46, "learning_rate": 4.270921386306002e-05, "loss": 140.381, "step": 1725, "task_loss": 2.734457015991211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.260866050802303, "compression/movement_sparsity/importance_threshold": -0.000750887654321194, "compression/movement_sparsity/linear_layer_sparsity": 0.8001048635150233, "compression/movement_sparsity/model_sparsity": 0.7726188000428202, "compression_loss": 135.3960418701172, "distillation_loss": 4.727226257324219, "epoch": 1.46, "learning_rate": 4.270498732037194e-05, "loss": 140.6312, "step": 1726, "task_loss": 2.4743335247039795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2619817594929943, "compression/movement_sparsity/importance_threshold": -0.0007473842961218686, "compression/movement_sparsity/linear_layer_sparsity": 0.8009066445468588, "compression/movement_sparsity/model_sparsity": 0.7733930374296452, "compression_loss": 135.51470947265625, "distillation_loss": 7.124170303344727, "epoch": 1.46, "learning_rate": 4.270076077768386e-05, "loss": 141.5347, "step": 1727, "task_loss": 3.0760762691497803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.263093992458492, "compression/movement_sparsity/importance_threshold": -0.0007438918518021748, "compression/movement_sparsity/linear_layer_sparsity": 0.8015949075028007, "compression/movement_sparsity/model_sparsity": 0.7740576564357061, "compression_loss": 135.63304138183594, "distillation_loss": 7.86110782623291, "epoch": 1.46, "learning_rate": 4.269653423499578e-05, "loss": 141.6356, "step": 1728, "task_loss": 3.749699592590332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2642027551211452, "compression/movement_sparsity/importance_threshold": -0.0007404103043357783, "compression/movement_sparsity/linear_layer_sparsity": 0.8023719101142889, "compression/movement_sparsity/model_sparsity": 0.7748079666171499, "compression_loss": 135.7509765625, "distillation_loss": 6.262055397033691, "epoch": 1.46, "learning_rate": 4.269230769230769e-05, "loss": 141.7056, "step": 1729, "task_loss": 2.6465249061584473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2653080529033018, "compression/movement_sparsity/importance_threshold": -0.0007369396366963474, "compression/movement_sparsity/linear_layer_sparsity": 0.8030577882367037, "compression/movement_sparsity/model_sparsity": 0.7754702827160519, "compression_loss": 135.8685302734375, "distillation_loss": 6.209960460662842, "epoch": 1.46, "learning_rate": 4.268808114961961e-05, "loss": 140.8428, "step": 1730, "task_loss": 3.453437566757202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2664098912273096, "compression/movement_sparsity/importance_threshold": -0.0007334798318575477, "compression/movement_sparsity/linear_layer_sparsity": 0.8039512422693237, "compression/movement_sparsity/model_sparsity": 0.7763330438540653, "compression_loss": 135.98565673828125, "distillation_loss": 7.214276313781738, "epoch": 1.46, "learning_rate": 4.268385460693153e-05, "loss": 141.6959, "step": 1731, "task_loss": 3.2046098709106445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2675082755155174, "compression/movement_sparsity/importance_threshold": -0.0007300308727930457, "compression/movement_sparsity/linear_layer_sparsity": 0.8047782667640442, "compression/movement_sparsity/model_sparsity": 0.7771316575131673, "compression_loss": 136.10238647460938, "distillation_loss": 5.1634297370910645, "epoch": 1.46, "learning_rate": 4.267962806424345e-05, "loss": 141.5635, "step": 1732, "task_loss": 3.3998639583587646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.268603211190273, "compression/movement_sparsity/importance_threshold": -0.0007265927424765081, "compression/movement_sparsity/linear_layer_sparsity": 0.8054772137741879, "compression/movement_sparsity/model_sparsity": 0.7778065935433001, "compression_loss": 136.2188262939453, "distillation_loss": 5.254622936248779, "epoch": 1.46, "learning_rate": 4.267540152155537e-05, "loss": 141.6908, "step": 1733, "task_loss": 2.616302013397217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2696947036739248, "compression/movement_sparsity/importance_threshold": -0.000723165423881603, "compression/movement_sparsity/linear_layer_sparsity": 0.8064489141948344, "compression/movement_sparsity/model_sparsity": 0.7787449130651972, "compression_loss": 136.33480834960938, "distillation_loss": 7.393130302429199, "epoch": 1.47, "learning_rate": 4.267117497886729e-05, "loss": 143.2797, "step": 1734, "task_loss": 3.7298991680145264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2707827583888207, "compression/movement_sparsity/importance_threshold": -0.0007197488999819953, "compression/movement_sparsity/linear_layer_sparsity": 0.8070632234630988, "compression/movement_sparsity/model_sparsity": 0.7793381189202603, "compression_loss": 136.450439453125, "distillation_loss": 5.786230087280273, "epoch": 1.47, "learning_rate": 4.2666948436179204e-05, "loss": 141.9069, "step": 1735, "task_loss": 1.6120538711547852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2718673807573095, "compression/movement_sparsity/importance_threshold": -0.0007163431537513523, "compression/movement_sparsity/linear_layer_sparsity": 0.8076602903849618, "compression/movement_sparsity/model_sparsity": 0.7799146747565646, "compression_loss": 136.56582641601562, "distillation_loss": 4.251001358032227, "epoch": 1.47, "learning_rate": 4.2662721893491124e-05, "loss": 141.724, "step": 1736, "task_loss": 2.6781833171844482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2729485762017387, "compression/movement_sparsity/importance_threshold": -0.0007129481681633423, "compression/movement_sparsity/linear_layer_sparsity": 0.8082966712875201, "compression/movement_sparsity/model_sparsity": 0.7805291940173834, "compression_loss": 136.68081665039062, "distillation_loss": 5.164517402648926, "epoch": 1.47, "learning_rate": 4.265849535080304e-05, "loss": 141.5867, "step": 1737, "task_loss": 1.9678385257720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2740263501444573, "compression/movement_sparsity/importance_threshold": -0.0007095639261916285, "compression/movement_sparsity/linear_layer_sparsity": 0.8089805223014368, "compression/movement_sparsity/model_sparsity": 0.7811895526452003, "compression_loss": 136.7954559326172, "distillation_loss": 4.900043487548828, "epoch": 1.47, "learning_rate": 4.265426880811497e-05, "loss": 142.1821, "step": 1738, "task_loss": 3.5264742374420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2751007080078125, "compression/movement_sparsity/importance_threshold": -0.0007061904108098815, "compression/movement_sparsity/linear_layer_sparsity": 0.809836343660998, "compression/movement_sparsity/model_sparsity": 0.7820159739082462, "compression_loss": 136.90969848632812, "distillation_loss": 5.815499305725098, "epoch": 1.47, "learning_rate": 4.265004226542688e-05, "loss": 142.3562, "step": 1739, "task_loss": 2.7860569953918457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.276171655214153, "compression/movement_sparsity/importance_threshold": -0.0007028276049917664, "compression/movement_sparsity/linear_layer_sparsity": 0.8106320552915068, "compression/movement_sparsity/model_sparsity": 0.7827843503963516, "compression_loss": 137.02357482910156, "distillation_loss": 5.78668212890625, "epoch": 1.47, "learning_rate": 4.26458157227388e-05, "loss": 143.0579, "step": 1740, "task_loss": 3.000568151473999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2772391971858275, "compression/movement_sparsity/importance_threshold": -0.0006994754917109486, "compression/movement_sparsity/linear_layer_sparsity": 0.811196104193186, "compression/movement_sparsity/model_sparsity": 0.7833290224830408, "compression_loss": 137.13705444335938, "distillation_loss": 5.275325775146484, "epoch": 1.47, "learning_rate": 4.264158918005072e-05, "loss": 143.1385, "step": 1741, "task_loss": 3.2993671894073486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2783033393451833, "compression/movement_sparsity/importance_threshold": -0.0006961340539410974, "compression/movement_sparsity/linear_layer_sparsity": 0.8119959535098644, "compression/movement_sparsity/model_sparsity": 0.784101394515067, "compression_loss": 137.250244140625, "distillation_loss": 5.988145351409912, "epoch": 1.47, "learning_rate": 4.2637362637362635e-05, "loss": 142.5019, "step": 1742, "task_loss": 4.9025983810424805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2793640871145688, "compression/movement_sparsity/importance_threshold": -0.0006928032746558776, "compression/movement_sparsity/linear_layer_sparsity": 0.812696021391766, "compression/movement_sparsity/model_sparsity": 0.7847774129115644, "compression_loss": 137.36300659179688, "distillation_loss": 7.043306350708008, "epoch": 1.47, "learning_rate": 4.2633136094674555e-05, "loss": 143.3293, "step": 1743, "task_loss": 3.0225682258605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2804214459163328, "compression/movement_sparsity/importance_threshold": -0.0006894831368289564, "compression/movement_sparsity/linear_layer_sparsity": 0.8133931678525966, "compression/movement_sparsity/model_sparsity": 0.7854506102467923, "compression_loss": 137.4755401611328, "distillation_loss": 7.989251136779785, "epoch": 1.47, "learning_rate": 4.262890955198648e-05, "loss": 143.6456, "step": 1744, "task_loss": 4.132726669311523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2814754211728228, "compression/movement_sparsity/importance_threshold": -0.0006861736234340006, "compression/movement_sparsity/linear_layer_sparsity": 0.8140134392046791, "compression/movement_sparsity/model_sparsity": 0.7860495733697528, "compression_loss": 137.5876007080078, "distillation_loss": 7.679581642150879, "epoch": 1.47, "learning_rate": 4.2624683009298394e-05, "loss": 143.6763, "step": 1745, "task_loss": 3.0121266841888428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2825260183063873, "compression/movement_sparsity/importance_threshold": -0.0006828747174446774, "compression/movement_sparsity/linear_layer_sparsity": 0.8147610248946094, "compression/movement_sparsity/model_sparsity": 0.7867714771913915, "compression_loss": 137.69935607910156, "distillation_loss": 6.29749059677124, "epoch": 1.48, "learning_rate": 4.2620456466610314e-05, "loss": 143.7032, "step": 1746, "task_loss": 3.543337821960449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2835732427393747, "compression/movement_sparsity/importance_threshold": -0.0006795864018346526, "compression/movement_sparsity/linear_layer_sparsity": 0.8155494031620222, "compression/movement_sparsity/model_sparsity": 0.7875327722399833, "compression_loss": 137.81072998046875, "distillation_loss": 6.114058494567871, "epoch": 1.48, "learning_rate": 4.2616229923922234e-05, "loss": 143.8729, "step": 1747, "task_loss": 2.6716294288635254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2846170998941326, "compression/movement_sparsity/importance_threshold": -0.0006763086595775935, "compression/movement_sparsity/linear_layer_sparsity": 0.8163544871882927, "compression/movement_sparsity/model_sparsity": 0.7883101991532233, "compression_loss": 137.9217529296875, "distillation_loss": 6.309618949890137, "epoch": 1.48, "learning_rate": 4.261200338123415e-05, "loss": 143.4778, "step": 1748, "task_loss": 3.4192652702331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2856575951930098, "compression/movement_sparsity/importance_threshold": -0.0006730414736471667, "compression/movement_sparsity/linear_layer_sparsity": 0.8170422374050264, "compression/movement_sparsity/model_sparsity": 0.7889743230342451, "compression_loss": 138.03240966796875, "distillation_loss": 4.761305332183838, "epoch": 1.48, "learning_rate": 4.2607776838546073e-05, "loss": 143.5089, "step": 1749, "task_loss": 2.7217016220092773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.286694734058354, "compression/movement_sparsity/importance_threshold": -0.0006697848270170396, "compression/movement_sparsity/linear_layer_sparsity": 0.8177118390386183, "compression/movement_sparsity/model_sparsity": 0.7896209217917876, "compression_loss": 138.14260864257812, "distillation_loss": 6.801706790924072, "epoch": 1.48, "learning_rate": 4.260355029585799e-05, "loss": 144.1379, "step": 1750, "task_loss": 3.7426958084106445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2877285219125139, "compression/movement_sparsity/importance_threshold": -0.0006665387026608778, "compression/movement_sparsity/linear_layer_sparsity": 0.8184638724430768, "compression/movement_sparsity/model_sparsity": 0.7903471205352776, "compression_loss": 138.25244140625, "distillation_loss": 7.121084213256836, "epoch": 1.48, "learning_rate": 4.2599323753169906e-05, "loss": 143.5769, "step": 1751, "task_loss": 3.386566162109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2887589641778374, "compression/movement_sparsity/importance_threshold": -0.0006633030835523478, "compression/movement_sparsity/linear_layer_sparsity": 0.8191650135000657, "compression/movement_sparsity/model_sparsity": 0.7910241752399966, "compression_loss": 138.36195373535156, "distillation_loss": 5.25375509262085, "epoch": 1.48, "learning_rate": 4.2595097210481826e-05, "loss": 143.9507, "step": 1752, "task_loss": 2.1418049335479736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2897860662766727, "compression/movement_sparsity/importance_threshold": -0.000660077952665118, "compression/movement_sparsity/linear_layer_sparsity": 0.8199223412349546, "compression/movement_sparsity/model_sparsity": 0.7917554864373795, "compression_loss": 138.47105407714844, "distillation_loss": 4.255082130432129, "epoch": 1.48, "learning_rate": 4.2590870667793746e-05, "loss": 144.2206, "step": 1753, "task_loss": 2.147740602493286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2908098336313678, "compression/movement_sparsity/importance_threshold": -0.000656863292972854, "compression/movement_sparsity/linear_layer_sparsity": 0.8206230768702437, "compression/movement_sparsity/model_sparsity": 0.7924321496478814, "compression_loss": 138.57981872558594, "distillation_loss": 5.833047389984131, "epoch": 1.48, "learning_rate": 4.2586644125105665e-05, "loss": 143.9094, "step": 1754, "task_loss": 3.3217697143554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2918302716642716, "compression/movement_sparsity/importance_threshold": -0.0006536590874492224, "compression/movement_sparsity/linear_layer_sparsity": 0.8212446956532689, "compression/movement_sparsity/model_sparsity": 0.7930324139133866, "compression_loss": 138.68817138671875, "distillation_loss": 5.923211574554443, "epoch": 1.48, "learning_rate": 4.2582417582417585e-05, "loss": 144.1391, "step": 1755, "task_loss": 3.254855155944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2928473857977316, "compression/movement_sparsity/importance_threshold": -0.0006504653190678895, "compression/movement_sparsity/linear_layer_sparsity": 0.8219512622065319, "compression/movement_sparsity/model_sparsity": 0.7937147077318922, "compression_loss": 138.7961883544922, "distillation_loss": 4.856620788574219, "epoch": 1.48, "learning_rate": 4.2578191039729505e-05, "loss": 144.2538, "step": 1756, "task_loss": 3.082163095474243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2938611814540961, "compression/movement_sparsity/importance_threshold": -0.0006472819708025239, "compression/movement_sparsity/linear_layer_sparsity": 0.8227291352822574, "compression/movement_sparsity/model_sparsity": 0.7944658584744491, "compression_loss": 138.90382385253906, "distillation_loss": 6.162832260131836, "epoch": 1.48, "learning_rate": 4.2573964497041425e-05, "loss": 144.5456, "step": 1757, "task_loss": 3.0379414558410645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2948716640557136, "compression/movement_sparsity/importance_threshold": -0.000644109025626791, "compression/movement_sparsity/linear_layer_sparsity": 0.8233315442312212, "compression/movement_sparsity/model_sparsity": 0.7950475728227893, "compression_loss": 139.01109313964844, "distillation_loss": 4.725363731384277, "epoch": 1.49, "learning_rate": 4.256973795435334e-05, "loss": 144.0043, "step": 1758, "task_loss": 2.153745174407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2958788390249318, "compression/movement_sparsity/importance_threshold": -0.0006409464665143574, "compression/movement_sparsity/linear_layer_sparsity": 0.8238902988024701, "compression/movement_sparsity/model_sparsity": 0.7955871324555857, "compression_loss": 139.1179656982422, "distillation_loss": 5.50393533706665, "epoch": 1.49, "learning_rate": 4.256551141166526e-05, "loss": 144.2977, "step": 1759, "task_loss": 2.245258331298828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2968827117840998, "compression/movement_sparsity/importance_threshold": -0.0006377942764388897, "compression/movement_sparsity/linear_layer_sparsity": 0.8245173073092668, "compression/movement_sparsity/model_sparsity": 0.79619260129127, "compression_loss": 139.22457885742188, "distillation_loss": 5.646256446838379, "epoch": 1.49, "learning_rate": 4.256128486897718e-05, "loss": 144.9921, "step": 1760, "task_loss": 2.5519707202911377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2978832877555648, "compression/movement_sparsity/importance_threshold": -0.0006346524383740544, "compression/movement_sparsity/linear_layer_sparsity": 0.825119275064028, "compression/movement_sparsity/model_sparsity": 0.7967738896017857, "compression_loss": 139.33065795898438, "distillation_loss": 5.85346794128418, "epoch": 1.49, "learning_rate": 4.25570583262891e-05, "loss": 145.2328, "step": 1761, "task_loss": 3.5156404972076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2988805723616752, "compression/movement_sparsity/importance_threshold": -0.0006315209352935206, "compression/movement_sparsity/linear_layer_sparsity": 0.8257291604660995, "compression/movement_sparsity/model_sparsity": 0.7973628235640692, "compression_loss": 139.4365234375, "distillation_loss": 5.043483734130859, "epoch": 1.49, "learning_rate": 4.2552831783601016e-05, "loss": 145.1233, "step": 1762, "task_loss": 2.149935483932495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.2998745710247797, "compression/movement_sparsity/importance_threshold": -0.0006283997501709522, "compression/movement_sparsity/linear_layer_sparsity": 0.8262538953870833, "compression/movement_sparsity/model_sparsity": 0.7978695322262437, "compression_loss": 139.5419158935547, "distillation_loss": 6.715606212615967, "epoch": 1.49, "learning_rate": 4.2548605240912936e-05, "loss": 144.7239, "step": 1763, "task_loss": 2.6563217639923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.300865289167226, "compression/movement_sparsity/importance_threshold": -0.0006252888659800167, "compression/movement_sparsity/linear_layer_sparsity": 0.8269647188681923, "compression/movement_sparsity/model_sparsity": 0.7985559367340279, "compression_loss": 139.64706420898438, "distillation_loss": 7.84239387512207, "epoch": 1.49, "learning_rate": 4.254437869822485e-05, "loss": 146.0125, "step": 1764, "task_loss": 2.8231847286224365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3018527322113624, "compression/movement_sparsity/importance_threshold": -0.0006221882656943815, "compression/movement_sparsity/linear_layer_sparsity": 0.8275511375083564, "compression/movement_sparsity/model_sparsity": 0.7991222100898676, "compression_loss": 139.75180053710938, "distillation_loss": 3.9311342239379883, "epoch": 1.49, "learning_rate": 4.254015215553677e-05, "loss": 144.7489, "step": 1765, "task_loss": 2.431652784347534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3028369055795375, "compression/movement_sparsity/importance_threshold": -0.000619097932287713, "compression/movement_sparsity/linear_layer_sparsity": 0.8281741633431626, "compression/movement_sparsity/model_sparsity": 0.7997238330705966, "compression_loss": 139.85621643066406, "distillation_loss": 7.11520528793335, "epoch": 1.49, "learning_rate": 4.2535925612848695e-05, "loss": 145.8988, "step": 1766, "task_loss": 3.0921268463134766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.303817814694099, "compression/movement_sparsity/importance_threshold": -0.000616017848733677, "compression/movement_sparsity/linear_layer_sparsity": 0.8288645726492792, "compression/movement_sparsity/model_sparsity": 0.8003905246931005, "compression_loss": 139.96034240722656, "distillation_loss": 4.946022987365723, "epoch": 1.49, "learning_rate": 4.2531699070160615e-05, "loss": 145.0384, "step": 1767, "task_loss": 2.514681339263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3047954649773952, "compression/movement_sparsity/importance_threshold": -0.0006129479980059426, "compression/movement_sparsity/linear_layer_sparsity": 0.8296048249761135, "compression/movement_sparsity/model_sparsity": 0.8011053470752256, "compression_loss": 140.06410217285156, "distillation_loss": 7.6344475746154785, "epoch": 1.49, "learning_rate": 4.252747252747253e-05, "loss": 145.7643, "step": 1768, "task_loss": 4.450456619262695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3057698618517743, "compression/movement_sparsity/importance_threshold": -0.0006098883630781737, "compression/movement_sparsity/linear_layer_sparsity": 0.8301827297605856, "compression/movement_sparsity/model_sparsity": 0.801663399052508, "compression_loss": 140.16763305664062, "distillation_loss": 6.687248229980469, "epoch": 1.5, "learning_rate": 4.252324598478445e-05, "loss": 145.8839, "step": 1769, "task_loss": 3.056523323059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3067410107395847, "compression/movement_sparsity/importance_threshold": -0.0006068389269240386, "compression/movement_sparsity/linear_layer_sparsity": 0.8309462819109804, "compression/movement_sparsity/model_sparsity": 0.8024007208375756, "compression_loss": 140.27076721191406, "distillation_loss": 5.196033477783203, "epoch": 1.5, "learning_rate": 4.251901944209637e-05, "loss": 146.5131, "step": 1770, "task_loss": 3.0600903034210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3077089170631744, "compression/movement_sparsity/importance_threshold": -0.0006037996725172037, "compression/movement_sparsity/linear_layer_sparsity": 0.8313825633564415, "compression/movement_sparsity/model_sparsity": 0.8028220146732263, "compression_loss": 140.3734130859375, "distillation_loss": 5.396224021911621, "epoch": 1.5, "learning_rate": 4.251479289940829e-05, "loss": 145.3328, "step": 1771, "task_loss": 1.6985580921173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3086735862448915, "compression/movement_sparsity/importance_threshold": -0.0006007705828313366, "compression/movement_sparsity/linear_layer_sparsity": 0.8321750673858561, "compression/movement_sparsity/model_sparsity": 0.8035872937512031, "compression_loss": 140.47586059570312, "distillation_loss": 5.913259506225586, "epoch": 1.5, "learning_rate": 4.251056635672021e-05, "loss": 146.2286, "step": 1772, "task_loss": 3.681518316268921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3096350237070846, "compression/movement_sparsity/importance_threshold": -0.000597751640840102, "compression/movement_sparsity/linear_layer_sparsity": 0.8328959429402822, "compression/movement_sparsity/model_sparsity": 0.804283405012662, "compression_loss": 140.57789611816406, "distillation_loss": 5.118448734283447, "epoch": 1.5, "learning_rate": 4.250633981403213e-05, "loss": 146.3774, "step": 1773, "task_loss": 2.3396239280700684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3105932348721014, "compression/movement_sparsity/importance_threshold": -0.0005947428295171682, "compression/movement_sparsity/linear_layer_sparsity": 0.8334564384400059, "compression/movement_sparsity/model_sparsity": 0.8048246457676844, "compression_loss": 140.67962646484375, "distillation_loss": 5.340882778167725, "epoch": 1.5, "learning_rate": 4.250211327134404e-05, "loss": 146.7904, "step": 1774, "task_loss": 4.274032115936279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3115482251622907, "compression/movement_sparsity/importance_threshold": -0.0005917441318362008, "compression/movement_sparsity/linear_layer_sparsity": 0.8340619476725551, "compression/movement_sparsity/model_sparsity": 0.8054093538953312, "compression_loss": 140.781005859375, "distillation_loss": 6.783961772918701, "epoch": 1.5, "learning_rate": 4.249788672865596e-05, "loss": 146.5327, "step": 1775, "task_loss": 3.5391533374786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3125, "compression/movement_sparsity/importance_threshold": -0.0005887555307708681, "compression/movement_sparsity/linear_layer_sparsity": 0.834637336457656, "compression/movement_sparsity/model_sparsity": 0.805964976305561, "compression_loss": 140.882080078125, "distillation_loss": 6.131011486053467, "epoch": 1.5, "learning_rate": 4.249366018596788e-05, "loss": 146.594, "step": 1776, "task_loss": 2.7621350288391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.313448564807578, "compression/movement_sparsity/importance_threshold": -0.0005857770092948349, "compression/movement_sparsity/linear_layer_sparsity": 0.8352930106634492, "compression/movement_sparsity/model_sparsity": 0.8065981260852954, "compression_loss": 140.9827423095703, "distillation_loss": 7.761905670166016, "epoch": 1.5, "learning_rate": 4.24894336432798e-05, "loss": 147.1191, "step": 1777, "task_loss": 3.130160093307495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3143939250073724, "compression/movement_sparsity/importance_threshold": -0.0005828085503817712, "compression/movement_sparsity/linear_layer_sparsity": 0.8360540348903053, "compression/movement_sparsity/model_sparsity": 0.8073330067887745, "compression_loss": 141.08303833007812, "distillation_loss": 6.2124176025390625, "epoch": 1.5, "learning_rate": 4.248520710059172e-05, "loss": 146.2784, "step": 1778, "task_loss": 2.9587185382843018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.315336086021732, "compression/movement_sparsity/importance_threshold": -0.0005798501370053392, "compression/movement_sparsity/linear_layer_sparsity": 0.8366004002513805, "compression/movement_sparsity/model_sparsity": 0.8078606028188804, "compression_loss": 141.1830291748047, "distillation_loss": 3.60258412361145, "epoch": 1.5, "learning_rate": 4.248098055790364e-05, "loss": 146.1561, "step": 1779, "task_loss": 1.6228842735290527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3162750532730045, "compression/movement_sparsity/importance_threshold": -0.0005769017521392089, "compression/movement_sparsity/linear_layer_sparsity": 0.83709272528473, "compression/movement_sparsity/model_sparsity": 0.8083360149727654, "compression_loss": 141.28269958496094, "distillation_loss": 5.322476387023926, "epoch": 1.5, "learning_rate": 4.247675401521555e-05, "loss": 146.8022, "step": 1780, "task_loss": 3.44547176361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3172108321835385, "compression/movement_sparsity/importance_threshold": -0.0005739633787570459, "compression/movement_sparsity/linear_layer_sparsity": 0.8376807894600279, "compression/movement_sparsity/model_sparsity": 0.8089038773345448, "compression_loss": 141.38189697265625, "distillation_loss": 7.39438533782959, "epoch": 1.51, "learning_rate": 4.247252747252747e-05, "loss": 147.0786, "step": 1781, "task_loss": 3.7741708755493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3181434281756819, "compression/movement_sparsity/importance_threshold": -0.0005710349998325169, "compression/movement_sparsity/linear_layer_sparsity": 0.8382278106503231, "compression/movement_sparsity/model_sparsity": 0.8094321066641194, "compression_loss": 141.48077392578125, "distillation_loss": 4.936664581298828, "epoch": 1.51, "learning_rate": 4.246830092983939e-05, "loss": 147.7109, "step": 1782, "task_loss": 2.7555670738220215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.319072846671783, "compression/movement_sparsity/importance_threshold": -0.0005681165983392892, "compression/movement_sparsity/linear_layer_sparsity": 0.838723498298946, "compression/movement_sparsity/model_sparsity": 0.8099107659170984, "compression_loss": 141.5792999267578, "distillation_loss": 6.903846263885498, "epoch": 1.51, "learning_rate": 4.246407438715132e-05, "loss": 147.6862, "step": 1783, "task_loss": 3.432710886001587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3199990930941898, "compression/movement_sparsity/importance_threshold": -0.0005652081572510284, "compression/movement_sparsity/linear_layer_sparsity": 0.8394581343428238, "compression/movement_sparsity/model_sparsity": 0.8106201649528643, "compression_loss": 141.677490234375, "distillation_loss": 7.227783203125, "epoch": 1.51, "learning_rate": 4.245984784446323e-05, "loss": 147.7478, "step": 1784, "task_loss": 4.365713119506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.320922172865251, "compression/movement_sparsity/importance_threshold": -0.0005623096595414028, "compression/movement_sparsity/linear_layer_sparsity": 0.8399994557809891, "compression/movement_sparsity/model_sparsity": 0.8111428903343291, "compression_loss": 141.775390625, "distillation_loss": 5.524361610412598, "epoch": 1.51, "learning_rate": 4.245562130177515e-05, "loss": 147.1765, "step": 1785, "task_loss": 1.9421035051345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3218420914073143, "compression/movement_sparsity/importance_threshold": -0.0005594210881840773, "compression/movement_sparsity/linear_layer_sparsity": 0.8406164599111392, "compression/movement_sparsity/model_sparsity": 0.8117386984744818, "compression_loss": 141.87286376953125, "distillation_loss": 5.094821929931641, "epoch": 1.51, "learning_rate": 4.245139475908707e-05, "loss": 146.8701, "step": 1786, "task_loss": 3.685307741165161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.322758854142728, "compression/movement_sparsity/importance_threshold": -0.00055654242615272, "compression/movement_sparsity/linear_layer_sparsity": 0.8412501936484824, "compression/movement_sparsity/model_sparsity": 0.8123506615083542, "compression_loss": 141.97003173828125, "distillation_loss": 4.70139217376709, "epoch": 1.51, "learning_rate": 4.244716821639898e-05, "loss": 148.1169, "step": 1787, "task_loss": 2.0995371341705322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3236724664938404, "compression/movement_sparsity/importance_threshold": -0.0005536736564209976, "compression/movement_sparsity/linear_layer_sparsity": 0.8417558022045782, "compression/movement_sparsity/model_sparsity": 0.8128389008551143, "compression_loss": 142.0667266845703, "distillation_loss": 5.7059197425842285, "epoch": 1.51, "learning_rate": 4.244294167371091e-05, "loss": 148.1606, "step": 1788, "task_loss": 4.309201717376709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3245829338829997, "compression/movement_sparsity/importance_threshold": -0.0005508147619625757, "compression/movement_sparsity/linear_layer_sparsity": 0.8422418193532484, "compression/movement_sparsity/model_sparsity": 0.813308221819564, "compression_loss": 142.16311645507812, "distillation_loss": 6.39350700378418, "epoch": 1.51, "learning_rate": 4.243871513102283e-05, "loss": 148.4218, "step": 1789, "task_loss": 3.282238006591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3254902617325541, "compression/movement_sparsity/importance_threshold": -0.0005479657257511216, "compression/movement_sparsity/linear_layer_sparsity": 0.84278895978522, "compression/movement_sparsity/model_sparsity": 0.8138365662944965, "compression_loss": 142.25926208496094, "distillation_loss": 10.267997741699219, "epoch": 1.51, "learning_rate": 4.243448858833474e-05, "loss": 149.5215, "step": 1790, "task_loss": 4.538952350616455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3263944554648517, "compression/movement_sparsity/importance_threshold": -0.0005451265307603028, "compression/movement_sparsity/linear_layer_sparsity": 0.8432978236390805, "compression/movement_sparsity/model_sparsity": 0.8143279491095285, "compression_loss": 142.35498046875, "distillation_loss": 6.726561546325684, "epoch": 1.51, "learning_rate": 4.243026204564666e-05, "loss": 148.6764, "step": 1791, "task_loss": 3.5487115383148193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3272955205022408, "compression/movement_sparsity/importance_threshold": -0.0005422971599637859, "compression/movement_sparsity/linear_layer_sparsity": 0.8439652550741626, "compression/movement_sparsity/model_sparsity": 0.8149724522215563, "compression_loss": 142.4505157470703, "distillation_loss": 6.282662868499756, "epoch": 1.51, "learning_rate": 4.242603550295858e-05, "loss": 148.6727, "step": 1792, "task_loss": 3.161579132080078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3281934622670695, "compression/movement_sparsity/importance_threshold": -0.0005394775963352355, "compression/movement_sparsity/linear_layer_sparsity": 0.8444908485352162, "compression/movement_sparsity/model_sparsity": 0.8154799899303081, "compression_loss": 142.545654296875, "distillation_loss": 5.848675727844238, "epoch": 1.52, "learning_rate": 4.24218089602705e-05, "loss": 149.2476, "step": 1793, "task_loss": 1.8755099773406982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.329088286181686, "compression/movement_sparsity/importance_threshold": -0.000536667822848321, "compression/movement_sparsity/linear_layer_sparsity": 0.8450572464979197, "compression/movement_sparsity/model_sparsity": 0.8160269303805489, "compression_loss": 142.64051818847656, "distillation_loss": 6.196131706237793, "epoch": 1.52, "learning_rate": 4.241758241758242e-05, "loss": 148.3279, "step": 1794, "task_loss": 2.8414669036865234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3299799976684386, "compression/movement_sparsity/importance_threshold": -0.0005338678224767078, "compression/movement_sparsity/linear_layer_sparsity": 0.8454806617665015, "compression/movement_sparsity/model_sparsity": 0.8164358000320772, "compression_loss": 142.7350311279297, "distillation_loss": 6.473362445831299, "epoch": 1.52, "learning_rate": 4.241335587489434e-05, "loss": 149.2629, "step": 1795, "task_loss": 3.872361183166504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3308686021496752, "compression/movement_sparsity/importance_threshold": -0.0005310775781940625, "compression/movement_sparsity/linear_layer_sparsity": 0.8459983852769154, "compression/movement_sparsity/model_sparsity": 0.8169357381472045, "compression_loss": 142.8291473388672, "distillation_loss": 7.025053024291992, "epoch": 1.52, "learning_rate": 4.240912933220626e-05, "loss": 149.7013, "step": 1796, "task_loss": 3.680112361907959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3317541050477444, "compression/movement_sparsity/importance_threshold": -0.0005282970729740526, "compression/movement_sparsity/linear_layer_sparsity": 0.8464915092294966, "compression/movement_sparsity/model_sparsity": 0.8174119217749878, "compression_loss": 142.92312622070312, "distillation_loss": 6.706751346588135, "epoch": 1.52, "learning_rate": 4.2404902789518173e-05, "loss": 148.0347, "step": 1797, "task_loss": 2.7960166931152344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3326365117849943, "compression/movement_sparsity/importance_threshold": -0.0005255262897903446, "compression/movement_sparsity/linear_layer_sparsity": 0.8469740922178876, "compression/movement_sparsity/model_sparsity": 0.8178779265531286, "compression_loss": 143.01654052734375, "distillation_loss": 5.169501304626465, "epoch": 1.52, "learning_rate": 4.240067624683009e-05, "loss": 148.3957, "step": 1798, "task_loss": 3.5008366107940674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3335158277837729, "compression/movement_sparsity/importance_threshold": -0.000522765211616605, "compression/movement_sparsity/linear_layer_sparsity": 0.8475131719125373, "compression/movement_sparsity/model_sparsity": 0.818398487201864, "compression_loss": 143.10958862304688, "distillation_loss": 6.25333309173584, "epoch": 1.52, "learning_rate": 4.239644970414201e-05, "loss": 148.8576, "step": 1799, "task_loss": 3.4381887912750244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3343920584664284, "compression/movement_sparsity/importance_threshold": -0.0005200138214265003, "compression/movement_sparsity/linear_layer_sparsity": 0.8480324336405762, "compression/movement_sparsity/model_sparsity": 0.8188999106921089, "compression_loss": 143.20242309570312, "distillation_loss": 7.622268199920654, "epoch": 1.52, "learning_rate": 4.239222316145393e-05, "loss": 149.2285, "step": 1800, "task_loss": 3.2173707485198975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3352652092553092, "compression/movement_sparsity/importance_threshold": -0.0005172721021936971, "compression/movement_sparsity/linear_layer_sparsity": 0.8484014031597327, "compression/movement_sparsity/model_sparsity": 0.8192562049731994, "compression_loss": 143.29476928710938, "distillation_loss": 6.684247970581055, "epoch": 1.52, "learning_rate": 4.238799661876585e-05, "loss": 149.5587, "step": 1801, "task_loss": 3.334637403488159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3361352855727633, "compression/movement_sparsity/importance_threshold": -0.0005145400368918637, "compression/movement_sparsity/linear_layer_sparsity": 0.8489656189997589, "compression/movement_sparsity/model_sparsity": 0.8198010382633897, "compression_loss": 143.3867950439453, "distillation_loss": 6.105194091796875, "epoch": 1.52, "learning_rate": 4.238377007607777e-05, "loss": 149.8265, "step": 1802, "task_loss": 3.4169492721557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3370022928411394, "compression/movement_sparsity/importance_threshold": -0.0005118176084946647, "compression/movement_sparsity/linear_layer_sparsity": 0.8496480510377269, "compression/movement_sparsity/model_sparsity": 0.820460026661447, "compression_loss": 143.478515625, "distillation_loss": 5.205389022827148, "epoch": 1.52, "learning_rate": 4.2379543533389685e-05, "loss": 150.1192, "step": 1803, "task_loss": 3.669997215270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.337866236482785, "compression/movement_sparsity/importance_threshold": -0.0005091047999757677, "compression/movement_sparsity/linear_layer_sparsity": 0.8500777622668205, "compression/movement_sparsity/model_sparsity": 0.820874975987875, "compression_loss": 143.5699462890625, "distillation_loss": 8.611461639404297, "epoch": 1.52, "learning_rate": 4.2375316990701605e-05, "loss": 150.6223, "step": 1804, "task_loss": 3.927917718887329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3387271219200483, "compression/movement_sparsity/importance_threshold": -0.0005064015943088401, "compression/movement_sparsity/linear_layer_sparsity": 0.8506815663433978, "compression/movement_sparsity/model_sparsity": 0.821458037536903, "compression_loss": 143.66094970703125, "distillation_loss": 7.84056282043457, "epoch": 1.53, "learning_rate": 4.237109044801353e-05, "loss": 149.9805, "step": 1805, "task_loss": 4.580684185028076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.339584954575278, "compression/movement_sparsity/importance_threshold": -0.0005037079744675465, "compression/movement_sparsity/linear_layer_sparsity": 0.8511092146914903, "compression/movement_sparsity/model_sparsity": 0.8218709948486385, "compression_loss": 143.75173950195312, "distillation_loss": 5.58791971206665, "epoch": 1.53, "learning_rate": 4.2366863905325444e-05, "loss": 149.2539, "step": 1806, "task_loss": 3.2603187561035156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3404397398708223, "compression/movement_sparsity/importance_threshold": -0.0005010239234255563, "compression/movement_sparsity/linear_layer_sparsity": 0.8517961540648248, "compression/movement_sparsity/model_sparsity": 0.8225343357412261, "compression_loss": 143.84222412109375, "distillation_loss": 6.129647254943848, "epoch": 1.53, "learning_rate": 4.2362637362637364e-05, "loss": 149.8162, "step": 1807, "task_loss": 3.5768771171569824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3412914832290288, "compression/movement_sparsity/importance_threshold": -0.0004983494241565349, "compression/movement_sparsity/linear_layer_sparsity": 0.8523260759987301, "compression/movement_sparsity/model_sparsity": 0.8230460532264714, "compression_loss": 143.9322509765625, "distillation_loss": 6.287062644958496, "epoch": 1.53, "learning_rate": 4.2358410819949284e-05, "loss": 149.6768, "step": 1808, "task_loss": 3.7027273178100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3421401900722463, "compression/movement_sparsity/importance_threshold": -0.000495684459634149, "compression/movement_sparsity/linear_layer_sparsity": 0.8529144144298836, "compression/movement_sparsity/model_sparsity": 0.8236141804225741, "compression_loss": 144.0220947265625, "distillation_loss": 5.887596130371094, "epoch": 1.53, "learning_rate": 4.23541842772612e-05, "loss": 149.5868, "step": 1809, "task_loss": 3.946030855178833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3429858658228226, "compression/movement_sparsity/importance_threshold": -0.000493029012832066, "compression/movement_sparsity/linear_layer_sparsity": 0.8534624253260926, "compression/movement_sparsity/model_sparsity": 0.8241433654586195, "compression_loss": 144.11154174804688, "distillation_loss": 7.010804176330566, "epoch": 1.53, "learning_rate": 4.234995773457312e-05, "loss": 150.4849, "step": 1810, "task_loss": 3.3192989826202393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3438285159031063, "compression/movement_sparsity/importance_threshold": -0.0004903830667239515, "compression/movement_sparsity/linear_layer_sparsity": 0.8539953879227451, "compression/movement_sparsity/model_sparsity": 0.8246580191504923, "compression_loss": 144.20071411132812, "distillation_loss": 6.429386615753174, "epoch": 1.53, "learning_rate": 4.234573119188504e-05, "loss": 150.6865, "step": 1811, "task_loss": 3.55954647064209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.344668145735445, "compression/movement_sparsity/importance_threshold": -0.0004877466042834729, "compression/movement_sparsity/linear_layer_sparsity": 0.854504239852438, "compression/movement_sparsity/model_sparsity": 0.8251493904509886, "compression_loss": 144.2896270751953, "distillation_loss": 6.856687545776367, "epoch": 1.53, "learning_rate": 4.234150464919696e-05, "loss": 150.9271, "step": 1812, "task_loss": 3.9256112575531006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3455047607421875, "compression/movement_sparsity/importance_threshold": -0.0004851196084842968, "compression/movement_sparsity/linear_layer_sparsity": 0.8549617463162907, "compression/movement_sparsity/model_sparsity": 0.8255911801603535, "compression_loss": 144.37806701660156, "distillation_loss": 6.978229999542236, "epoch": 1.53, "learning_rate": 4.2337278106508876e-05, "loss": 150.3833, "step": 1813, "task_loss": 2.405771255493164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3463383663456816, "compression/movement_sparsity/importance_threshold": -0.00048250206230008967, "compression/movement_sparsity/linear_layer_sparsity": 0.8555096260466557, "compression/movement_sparsity/model_sparsity": 0.8261202385365053, "compression_loss": 144.4663543701172, "distillation_loss": 6.950952529907227, "epoch": 1.53, "learning_rate": 4.2333051563820795e-05, "loss": 150.616, "step": 1814, "task_loss": 3.5087249279022217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3471689679682757, "compression/movement_sparsity/importance_threshold": -0.0004798939487045189, "compression/movement_sparsity/linear_layer_sparsity": 0.8560141018068262, "compression/movement_sparsity/model_sparsity": 0.8266073840023649, "compression_loss": 144.5541534423828, "distillation_loss": 7.087459564208984, "epoch": 1.53, "learning_rate": 4.2328825021132715e-05, "loss": 150.6183, "step": 1815, "task_loss": 4.267423152923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.347996571032318, "compression/movement_sparsity/importance_threshold": -0.0004772952506712511, "compression/movement_sparsity/linear_layer_sparsity": 0.8564571084828337, "compression/movement_sparsity/model_sparsity": 0.8270351720362037, "compression_loss": 144.64161682128906, "distillation_loss": 6.650256633758545, "epoch": 1.53, "learning_rate": 4.2324598478444635e-05, "loss": 150.562, "step": 1816, "task_loss": 3.4272704124450684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3488211809601565, "compression/movement_sparsity/importance_threshold": -0.0004747059511739527, "compression/movement_sparsity/linear_layer_sparsity": 0.8570149806656776, "compression/movement_sparsity/model_sparsity": 0.8275738795933513, "compression_loss": 144.72889709472656, "distillation_loss": 5.877427577972412, "epoch": 1.54, "learning_rate": 4.2320371935756555e-05, "loss": 150.7755, "step": 1817, "task_loss": 2.9719197750091553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3496428031741394, "compression/movement_sparsity/importance_threshold": -0.0004721260331862903, "compression/movement_sparsity/linear_layer_sparsity": 0.8574862953156528, "compression/movement_sparsity/model_sparsity": 0.8280290031351663, "compression_loss": 144.81565856933594, "distillation_loss": 7.523372650146484, "epoch": 1.54, "learning_rate": 4.2316145393068474e-05, "loss": 151.1628, "step": 1818, "task_loss": 3.132990598678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3504614430966153, "compression/movement_sparsity/importance_threshold": -0.00046955547968193043, "compression/movement_sparsity/linear_layer_sparsity": 0.85798281765601, "compression/movement_sparsity/model_sparsity": 0.828508468405651, "compression_loss": 144.90219116210938, "distillation_loss": 5.5037713050842285, "epoch": 1.54, "learning_rate": 4.231191885038039e-05, "loss": 151.1072, "step": 1819, "task_loss": 2.1953752040863037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3512771061499318, "compression/movement_sparsity/importance_threshold": -0.00046699427363454046, "compression/movement_sparsity/linear_layer_sparsity": 0.8583481264557024, "compression/movement_sparsity/model_sparsity": 0.8288612277242525, "compression_loss": 144.9883575439453, "distillation_loss": 6.959022521972656, "epoch": 1.54, "learning_rate": 4.230769230769231e-05, "loss": 150.8487, "step": 1820, "task_loss": 2.932663917541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3520897977564377, "compression/movement_sparsity/importance_threshold": -0.00046444239801778694, "compression/movement_sparsity/linear_layer_sparsity": 0.858797452940557, "compression/movement_sparsity/model_sparsity": 0.8292951184620624, "compression_loss": 145.07412719726562, "distillation_loss": 7.074075222015381, "epoch": 1.54, "learning_rate": 4.230346576500423e-05, "loss": 151.7975, "step": 1821, "task_loss": 2.8492836952209473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3528995233384806, "compression/movement_sparsity/importance_threshold": -0.0004618998358053364, "compression/movement_sparsity/linear_layer_sparsity": 0.8594212657704271, "compression/movement_sparsity/model_sparsity": 0.8298975014021538, "compression_loss": 145.1597137451172, "distillation_loss": 5.904569625854492, "epoch": 1.54, "learning_rate": 4.2299239222316147e-05, "loss": 150.8993, "step": 1822, "task_loss": 3.1637818813323975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3537062883184092, "compression/movement_sparsity/importance_threshold": -0.0004593665699708545, "compression/movement_sparsity/linear_layer_sparsity": 0.8598171600601054, "compression/movement_sparsity/model_sparsity": 0.8302797955050684, "compression_loss": 145.24488830566406, "distillation_loss": 6.740392684936523, "epoch": 1.54, "learning_rate": 4.2295012679628066e-05, "loss": 151.5313, "step": 1823, "task_loss": 2.514648675918579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3545100981185716, "compression/movement_sparsity/importance_threshold": -0.00045684258348801035, "compression/movement_sparsity/linear_layer_sparsity": 0.8601760775059449, "compression/movement_sparsity/model_sparsity": 0.830626383032484, "compression_loss": 145.329833984375, "distillation_loss": 8.003349304199219, "epoch": 1.54, "learning_rate": 4.2290786136939986e-05, "loss": 151.9314, "step": 1824, "task_loss": 3.5564355850219727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3553109581613154, "compression/movement_sparsity/importance_threshold": -0.00045432785933046966, "compression/movement_sparsity/linear_layer_sparsity": 0.8606724925287935, "compression/movement_sparsity/model_sparsity": 0.8311057446721466, "compression_loss": 145.41448974609375, "distillation_loss": 6.623943328857422, "epoch": 1.54, "learning_rate": 4.2286559594251906e-05, "loss": 151.7437, "step": 1825, "task_loss": 3.1305036544799805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.35610887386899, "compression/movement_sparsity/importance_threshold": -0.00045182238047189807, "compression/movement_sparsity/linear_layer_sparsity": 0.8610900053343956, "compression/movement_sparsity/model_sparsity": 0.8315089146284567, "compression_loss": 145.49874877929688, "distillation_loss": 5.9034013748168945, "epoch": 1.54, "learning_rate": 4.228233305156382e-05, "loss": 152.0808, "step": 1826, "task_loss": 2.798334836959839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3569038506639421, "compression/movement_sparsity/importance_threshold": -0.000449326129885963, "compression/movement_sparsity/linear_layer_sparsity": 0.8614956058965295, "compression/movement_sparsity/model_sparsity": 0.8319005815635079, "compression_loss": 145.58274841308594, "distillation_loss": 6.358174800872803, "epoch": 1.54, "learning_rate": 4.2278106508875745e-05, "loss": 151.3871, "step": 1827, "task_loss": 2.62842059135437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.357695893968521, "compression/movement_sparsity/importance_threshold": -0.0004468390905463318, "compression/movement_sparsity/linear_layer_sparsity": 0.8619283935608731, "compression/movement_sparsity/model_sparsity": 0.8323185016401707, "compression_loss": 145.66644287109375, "distillation_loss": 6.892963409423828, "epoch": 1.54, "learning_rate": 4.2273879966187665e-05, "loss": 152.408, "step": 1828, "task_loss": 3.1583328247070312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3584850092050746, "compression/movement_sparsity/importance_threshold": -0.00044436124542667106, "compression/movement_sparsity/linear_layer_sparsity": 0.8624150903870985, "compression/movement_sparsity/model_sparsity": 0.8327884789331608, "compression_loss": 145.749755859375, "distillation_loss": 5.919205665588379, "epoch": 1.55, "learning_rate": 4.226965342349958e-05, "loss": 151.6775, "step": 1829, "task_loss": 2.1881580352783203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3592712017959512, "compression/movement_sparsity/importance_threshold": -0.0004418925775006464, "compression/movement_sparsity/linear_layer_sparsity": 0.8628325435718625, "compression/movement_sparsity/model_sparsity": 0.8331915913167919, "compression_loss": 145.8328857421875, "distillation_loss": 5.807865142822266, "epoch": 1.55, "learning_rate": 4.22654268808115e-05, "loss": 151.2798, "step": 1830, "task_loss": 2.874177932739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3600544771634988, "compression/movement_sparsity/importance_threshold": -0.00043943306974192527, "compression/movement_sparsity/linear_layer_sparsity": 0.8632254806679671, "compression/movement_sparsity/model_sparsity": 0.8335710298148293, "compression_loss": 145.91561889648438, "distillation_loss": 5.8735809326171875, "epoch": 1.55, "learning_rate": 4.226120033812342e-05, "loss": 152.3776, "step": 1831, "task_loss": 3.535463809967041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3608348407300652, "compression/movement_sparsity/importance_threshold": -0.000436982705124175, "compression/movement_sparsity/linear_layer_sparsity": 0.8637796802071791, "compression/movement_sparsity/model_sparsity": 0.8341061908949522, "compression_loss": 145.99807739257812, "distillation_loss": 7.100095272064209, "epoch": 1.55, "learning_rate": 4.225697379543534e-05, "loss": 152.7719, "step": 1832, "task_loss": 3.687058687210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3616122979179996, "compression/movement_sparsity/importance_threshold": -0.00043454146662106133, "compression/movement_sparsity/linear_layer_sparsity": 0.8642018553623269, "compression/movement_sparsity/model_sparsity": 0.8345138630347579, "compression_loss": 146.08018493652344, "distillation_loss": 6.087064266204834, "epoch": 1.55, "learning_rate": 4.225274725274726e-05, "loss": 152.1026, "step": 1833, "task_loss": 2.3889381885528564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3623868541496493, "compression/movement_sparsity/importance_threshold": -0.00043210933720625073, "compression/movement_sparsity/linear_layer_sparsity": 0.8645782774862557, "compression/movement_sparsity/model_sparsity": 0.83487735390072, "compression_loss": 146.1621551513672, "distillation_loss": 7.033417224884033, "epoch": 1.55, "learning_rate": 4.224852071005918e-05, "loss": 152.6333, "step": 1834, "task_loss": 3.7293763160705566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3631585148473628, "compression/movement_sparsity/importance_threshold": -0.0004296862998534115, "compression/movement_sparsity/linear_layer_sparsity": 0.8650280690136481, "compression/movement_sparsity/model_sparsity": 0.8353116937054259, "compression_loss": 146.24349975585938, "distillation_loss": 6.198794364929199, "epoch": 1.55, "learning_rate": 4.224429416737109e-05, "loss": 152.2939, "step": 1835, "task_loss": 3.011843204498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3639272854334887, "compression/movement_sparsity/importance_threshold": -0.0004272723375362093, "compression/movement_sparsity/linear_layer_sparsity": 0.8653568183153688, "compression/movement_sparsity/model_sparsity": 0.8356291494572814, "compression_loss": 146.32472229003906, "distillation_loss": 7.574608325958252, "epoch": 1.55, "learning_rate": 4.224006762468301e-05, "loss": 152.8366, "step": 1836, "task_loss": 4.515462398529053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3646931713303745, "compression/movement_sparsity/importance_threshold": -0.00042486743322831064, "compression/movement_sparsity/linear_layer_sparsity": 0.8657347428844199, "compression/movement_sparsity/model_sparsity": 0.8359940911547535, "compression_loss": 146.40557861328125, "distillation_loss": 6.0002121925354, "epoch": 1.55, "learning_rate": 4.223584108199493e-05, "loss": 152.0068, "step": 1837, "task_loss": 2.5071210861206055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3654561779603689, "compression/movement_sparsity/importance_threshold": -0.0004224715699033821, "compression/movement_sparsity/linear_layer_sparsity": 0.8661620931283216, "compression/movement_sparsity/model_sparsity": 0.836406760603094, "compression_loss": 146.4863739013672, "distillation_loss": 7.250365257263184, "epoch": 1.55, "learning_rate": 4.223161453930685e-05, "loss": 153.3079, "step": 1838, "task_loss": 4.441134929656982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3662163107458198, "compression/movement_sparsity/importance_threshold": -0.0004200847305350919, "compression/movement_sparsity/linear_layer_sparsity": 0.8666021306865879, "compression/movement_sparsity/model_sparsity": 0.83683168151752, "compression_loss": 146.5667724609375, "distillation_loss": 6.808737277984619, "epoch": 1.55, "learning_rate": 4.222738799661877e-05, "loss": 152.6478, "step": 1839, "task_loss": 3.1851916313171387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3669735751090755, "compression/movement_sparsity/importance_threshold": -0.0004177068980971057, "compression/movement_sparsity/linear_layer_sparsity": 0.8669859934911216, "compression/movement_sparsity/model_sparsity": 0.8372023574538179, "compression_loss": 146.6467742919922, "distillation_loss": 4.923266887664795, "epoch": 1.56, "learning_rate": 4.222316145393069e-05, "loss": 151.9591, "step": 1840, "task_loss": 2.7223432064056396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3677279764724841, "compression/movement_sparsity/importance_threshold": -0.0004153380555630892, "compression/movement_sparsity/linear_layer_sparsity": 0.867440268505545, "compression/movement_sparsity/model_sparsity": 0.8376410267239824, "compression_loss": 146.72659301757812, "distillation_loss": 7.620291709899902, "epoch": 1.56, "learning_rate": 4.221893491124261e-05, "loss": 153.3653, "step": 1841, "task_loss": 4.143599510192871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3684795202583941, "compression/movement_sparsity/importance_threshold": -0.0004129781859067115, "compression/movement_sparsity/linear_layer_sparsity": 0.8678263849777619, "compression/movement_sparsity/model_sparsity": 0.8380138789075455, "compression_loss": 146.80592346191406, "distillation_loss": 6.786708831787109, "epoch": 1.56, "learning_rate": 4.221470836855452e-05, "loss": 152.9798, "step": 1842, "task_loss": 3.249237060546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3692282118891534, "compression/movement_sparsity/importance_threshold": -0.00041062727210163744, "compression/movement_sparsity/linear_layer_sparsity": 0.8682289448771486, "compression/movement_sparsity/model_sparsity": 0.8384026096359692, "compression_loss": 146.8849639892578, "distillation_loss": 9.060035705566406, "epoch": 1.56, "learning_rate": 4.221048182586644e-05, "loss": 153.3529, "step": 1843, "task_loss": 3.531567096710205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3699740567871104, "compression/movement_sparsity/importance_threshold": -0.0004082852971215335, "compression/movement_sparsity/linear_layer_sparsity": 0.8686887527053552, "compression/movement_sparsity/model_sparsity": 0.8388466216507425, "compression_loss": 146.9639129638672, "distillation_loss": 5.924881935119629, "epoch": 1.56, "learning_rate": 4.220625528317837e-05, "loss": 154.1292, "step": 1844, "task_loss": 2.439697742462158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3707170603746128, "compression/movement_sparsity/importance_threshold": -0.0004059522439400689, "compression/movement_sparsity/linear_layer_sparsity": 0.8691704652295087, "compression/movement_sparsity/model_sparsity": 0.8393117858677703, "compression_loss": 147.04232788085938, "distillation_loss": 5.721198081970215, "epoch": 1.56, "learning_rate": 4.220202874049028e-05, "loss": 153.2371, "step": 1845, "task_loss": 3.8636693954467773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3714572280740096, "compression/movement_sparsity/importance_threshold": -0.00040362809553090746, "compression/movement_sparsity/linear_layer_sparsity": 0.8696560411839763, "compression/movement_sparsity/model_sparsity": 0.8397806807943956, "compression_loss": 147.12051391601562, "distillation_loss": 7.845920562744141, "epoch": 1.56, "learning_rate": 4.21978021978022e-05, "loss": 154.077, "step": 1846, "task_loss": 3.2476134300231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3721945653076486, "compression/movement_sparsity/importance_threshold": -0.00040131283486771665, "compression/movement_sparsity/linear_layer_sparsity": 0.8700802315234546, "compression/movement_sparsity/model_sparsity": 0.8401902988907507, "compression_loss": 147.19830322265625, "distillation_loss": 7.6389031410217285, "epoch": 1.56, "learning_rate": 4.219357565511412e-05, "loss": 154.2532, "step": 1847, "task_loss": 3.4175102710723877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.372929077497878, "compression/movement_sparsity/importance_threshold": -0.00039900644492416384, "compression/movement_sparsity/linear_layer_sparsity": 0.870514366618741, "compression/movement_sparsity/model_sparsity": 0.8406095201099583, "compression_loss": 147.27589416503906, "distillation_loss": 8.966110229492188, "epoch": 1.56, "learning_rate": 4.218934911242603e-05, "loss": 154.0425, "step": 1848, "task_loss": 3.729973316192627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3736607700670453, "compression/movement_sparsity/importance_threshold": -0.00039670890867391645, "compression/movement_sparsity/linear_layer_sparsity": 0.8709058131938912, "compression/movement_sparsity/model_sparsity": 0.8409875192910214, "compression_loss": 147.35313415527344, "distillation_loss": 5.036543846130371, "epoch": 1.56, "learning_rate": 4.218512256973796e-05, "loss": 153.0647, "step": 1849, "task_loss": 3.1140565872192383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3743896484375, "compression/movement_sparsity/importance_threshold": -0.00039442020909064013, "compression/movement_sparsity/linear_layer_sparsity": 0.8712969616648504, "compression/movement_sparsity/model_sparsity": 0.8413652306086898, "compression_loss": 147.43003845214844, "distillation_loss": 6.887899398803711, "epoch": 1.56, "learning_rate": 4.218089602704988e-05, "loss": 153.4423, "step": 1850, "task_loss": 3.7076354026794434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3751157180315896, "compression/movement_sparsity/importance_threshold": -0.0003921403291480014, "compression/movement_sparsity/linear_layer_sparsity": 0.871736951526446, "compression/movement_sparsity/model_sparsity": 0.8417901054649726, "compression_loss": 147.50662231445312, "distillation_loss": 6.127902984619141, "epoch": 1.56, "learning_rate": 4.217666948436179e-05, "loss": 152.6885, "step": 1851, "task_loss": 3.397775173187256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.375838984271662, "compression/movement_sparsity/importance_threshold": -0.0003898692518196677, "compression/movement_sparsity/linear_layer_sparsity": 0.8721504458875549, "compression/movement_sparsity/model_sparsity": 0.8421893950227198, "compression_loss": 147.582763671875, "distillation_loss": 5.996698379516602, "epoch": 1.57, "learning_rate": 4.217244294167371e-05, "loss": 152.813, "step": 1852, "task_loss": 2.6034128665924072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.376559452580066, "compression/movement_sparsity/importance_threshold": -0.00038760696007930553, "compression/movement_sparsity/linear_layer_sparsity": 0.8724252621790587, "compression/movement_sparsity/model_sparsity": 0.8424547705291766, "compression_loss": 147.65869140625, "distillation_loss": 5.135795593261719, "epoch": 1.57, "learning_rate": 4.216821639898563e-05, "loss": 153.8605, "step": 1853, "task_loss": 3.0036847591400146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3772771283791496, "compression/movement_sparsity/importance_threshold": -0.00038535343690058056, "compression/movement_sparsity/linear_layer_sparsity": 0.8728179011709724, "compression/movement_sparsity/model_sparsity": 0.8428339211638193, "compression_loss": 147.73423767089844, "distillation_loss": 6.802160263061523, "epoch": 1.57, "learning_rate": 4.216398985629755e-05, "loss": 154.3701, "step": 1854, "task_loss": 3.7523281574249268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.377992017091261, "compression/movement_sparsity/importance_threshold": -0.00038310866525716105, "compression/movement_sparsity/linear_layer_sparsity": 0.873237751113431, "compression/movement_sparsity/model_sparsity": 0.843239347969145, "compression_loss": 147.8097686767578, "distillation_loss": 6.210305213928223, "epoch": 1.57, "learning_rate": 4.215976331360947e-05, "loss": 154.1215, "step": 1855, "task_loss": 3.5660030841827393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3787041241387479, "compression/movement_sparsity/importance_threshold": -0.00038087262812271355, "compression/movement_sparsity/linear_layer_sparsity": 0.8734228261193073, "compression/movement_sparsity/model_sparsity": 0.8434180650792121, "compression_loss": 147.88473510742188, "distillation_loss": 5.39631986618042, "epoch": 1.57, "learning_rate": 4.215553677092139e-05, "loss": 153.3718, "step": 1856, "task_loss": 2.5087478160858154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3794134549439594, "compression/movement_sparsity/importance_threshold": -0.0003786453084709037, "compression/movement_sparsity/linear_layer_sparsity": 0.8739739134507664, "compression/movement_sparsity/model_sparsity": 0.8439502208654927, "compression_loss": 147.95953369140625, "distillation_loss": 7.3603668212890625, "epoch": 1.57, "learning_rate": 4.215131022823331e-05, "loss": 154.5319, "step": 1857, "task_loss": 3.0146098136901855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.380120014929243, "compression/movement_sparsity/importance_threshold": -0.00037642668927539893, "compression/movement_sparsity/linear_layer_sparsity": 0.8743660516276394, "compression/movement_sparsity/model_sparsity": 0.8443288878896319, "compression_loss": 148.03399658203125, "distillation_loss": 6.592740535736084, "epoch": 1.57, "learning_rate": 4.214708368554522e-05, "loss": 154.4575, "step": 1858, "task_loss": 3.2644922733306885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3808238095169467, "compression/movement_sparsity/importance_threshold": -0.0003742167535098666, "compression/movement_sparsity/linear_layer_sparsity": 0.8747877617402493, "compression/movement_sparsity/model_sparsity": 0.8447361109625416, "compression_loss": 148.10821533203125, "distillation_loss": 5.234306335449219, "epoch": 1.57, "learning_rate": 4.214285714285714e-05, "loss": 153.6809, "step": 1859, "task_loss": 3.560842752456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.38152484412942, "compression/movement_sparsity/importance_threshold": -0.0003720154841479724, "compression/movement_sparsity/linear_layer_sparsity": 0.8751528201324212, "compression/movement_sparsity/model_sparsity": 0.8450886284758915, "compression_loss": 148.18199157714844, "distillation_loss": 6.694014549255371, "epoch": 1.57, "learning_rate": 4.213863060016906e-05, "loss": 155.3337, "step": 1860, "task_loss": 5.197516918182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3822231241890095, "compression/movement_sparsity/importance_threshold": -0.0003698228641633829, "compression/movement_sparsity/linear_layer_sparsity": 0.8755148617101813, "compression/movement_sparsity/model_sparsity": 0.8454382328116853, "compression_loss": 148.2556915283203, "distillation_loss": 6.522852420806885, "epoch": 1.57, "learning_rate": 4.213440405748098e-05, "loss": 154.911, "step": 1861, "task_loss": 2.605754852294922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3829186551180643, "compression/movement_sparsity/importance_threshold": -0.0003676388765297663, "compression/movement_sparsity/linear_layer_sparsity": 0.8759651063559438, "compression/movement_sparsity/model_sparsity": 0.8458730101687514, "compression_loss": 148.3290252685547, "distillation_loss": 7.264030456542969, "epoch": 1.57, "learning_rate": 4.21301775147929e-05, "loss": 154.6723, "step": 1862, "task_loss": 3.095665693283081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3836114423389325, "compression/movement_sparsity/importance_threshold": -0.0003654635042207874, "compression/movement_sparsity/linear_layer_sparsity": 0.87637639474604, "compression/movement_sparsity/model_sparsity": 0.8462701695373767, "compression_loss": 148.40194702148438, "distillation_loss": 6.592911243438721, "epoch": 1.57, "learning_rate": 4.212595097210482e-05, "loss": 155.1815, "step": 1863, "task_loss": 3.470334768295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3843014912739622, "compression/movement_sparsity/importance_threshold": -0.00036329673021011366, "compression/movement_sparsity/linear_layer_sparsity": 0.8767222910008211, "compression/movement_sparsity/model_sparsity": 0.8466041831917047, "compression_loss": 148.4746551513672, "distillation_loss": 7.3904948234558105, "epoch": 1.58, "learning_rate": 4.2121724429416735e-05, "loss": 155.475, "step": 1864, "task_loss": 4.322600364685059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3849888073455014, "compression/movement_sparsity/importance_threshold": -0.0003611385374714124, "compression/movement_sparsity/linear_layer_sparsity": 0.8772139959774536, "compression/movement_sparsity/model_sparsity": 0.8470789965897284, "compression_loss": 148.54693603515625, "distillation_loss": 6.1083526611328125, "epoch": 1.58, "learning_rate": 4.2117497886728655e-05, "loss": 155.6581, "step": 1865, "task_loss": 3.5356814861297607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3856733959758984, "compression/movement_sparsity/importance_threshold": -0.00035898890897835024, "compression/movement_sparsity/linear_layer_sparsity": 0.8775861969460395, "compression/movement_sparsity/model_sparsity": 0.8474384113100192, "compression_loss": 148.61912536621094, "distillation_loss": 7.036923885345459, "epoch": 1.58, "learning_rate": 4.211327134404058e-05, "loss": 154.8982, "step": 1866, "task_loss": 2.5222835540771484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3863552625875017, "compression/movement_sparsity/importance_threshold": -0.00035684782770459365, "compression/movement_sparsity/linear_layer_sparsity": 0.8780465055892867, "compression/movement_sparsity/model_sparsity": 0.8478829069352959, "compression_loss": 148.69093322753906, "distillation_loss": 7.396998405456543, "epoch": 1.58, "learning_rate": 4.2109044801352494e-05, "loss": 155.8431, "step": 1867, "task_loss": 3.3267486095428467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3870344126026593, "compression/movement_sparsity/importance_threshold": -0.00035471527662380917, "compression/movement_sparsity/linear_layer_sparsity": 0.8783268427704057, "compression/movement_sparsity/model_sparsity": 0.8481536136718255, "compression_loss": 148.76242065429688, "distillation_loss": 8.70544147491455, "epoch": 1.58, "learning_rate": 4.2104818258664414e-05, "loss": 155.9333, "step": 1868, "task_loss": 3.489133596420288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3877108514437193, "compression/movement_sparsity/importance_threshold": -0.00035259123870966334, "compression/movement_sparsity/linear_layer_sparsity": 0.8786594197299377, "compression/movement_sparsity/model_sparsity": 0.848474765589671, "compression_loss": 148.83370971679688, "distillation_loss": 6.990805625915527, "epoch": 1.58, "learning_rate": 4.2100591715976334e-05, "loss": 155.0529, "step": 1869, "task_loss": 3.1875967979431152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3883845845330298, "compression/movement_sparsity/importance_threshold": -0.00035047569693582355, "compression/movement_sparsity/linear_layer_sparsity": 0.8789720952536852, "compression/movement_sparsity/model_sparsity": 0.8487766997472755, "compression_loss": 148.90463256835938, "distillation_loss": 5.877851486206055, "epoch": 1.58, "learning_rate": 4.209636517328825e-05, "loss": 155.1167, "step": 1870, "task_loss": 2.9592432975769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3890556172929394, "compression/movement_sparsity/importance_threshold": -0.00034836863427595635, "compression/movement_sparsity/linear_layer_sparsity": 0.8793267199991758, "compression/movement_sparsity/model_sparsity": 0.8491191420418052, "compression_loss": 148.975341796875, "distillation_loss": 6.457624912261963, "epoch": 1.58, "learning_rate": 4.2092138630600166e-05, "loss": 155.3938, "step": 1871, "task_loss": 2.821925640106201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3897239551457958, "compression/movement_sparsity/importance_threshold": -0.00034627003370372826, "compression/movement_sparsity/linear_layer_sparsity": 0.8797472019225192, "compression/movement_sparsity/model_sparsity": 0.849525179117528, "compression_loss": 149.04559326171875, "distillation_loss": 4.93093729019165, "epoch": 1.58, "learning_rate": 4.208791208791209e-05, "loss": 154.349, "step": 1872, "task_loss": 2.7775089740753174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3903896035139476, "compression/movement_sparsity/importance_threshold": -0.0003441798781928058, "compression/movement_sparsity/linear_layer_sparsity": 0.8800460811843122, "compression/movement_sparsity/model_sparsity": 0.8498137909572182, "compression_loss": 149.11572265625, "distillation_loss": 7.825369834899902, "epoch": 1.58, "learning_rate": 4.208368554522401e-05, "loss": 155.5625, "step": 1873, "task_loss": 3.8700406551361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.391052567819743, "compression/movement_sparsity/importance_threshold": -0.00034209815071685554, "compression/movement_sparsity/linear_layer_sparsity": 0.8803635144509464, "compression/movement_sparsity/model_sparsity": 0.8501203194146046, "compression_loss": 149.18551635742188, "distillation_loss": 6.081006050109863, "epoch": 1.58, "learning_rate": 4.2079459002535926e-05, "loss": 155.2744, "step": 1874, "task_loss": 2.857639789581299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3917128534855299, "compression/movement_sparsity/importance_threshold": -0.00034002483424954485, "compression/movement_sparsity/linear_layer_sparsity": 0.8807055472754135, "compression/movement_sparsity/model_sparsity": 0.8504506023593353, "compression_loss": 149.25497436523438, "distillation_loss": 7.600732326507568, "epoch": 1.58, "learning_rate": 4.2075232459847845e-05, "loss": 155.9755, "step": 1875, "task_loss": 4.325234889984131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3923704659336564, "compression/movement_sparsity/importance_threshold": -0.00033795991176454026, "compression/movement_sparsity/linear_layer_sparsity": 0.8810463161381112, "compression/movement_sparsity/model_sparsity": 0.8507796647632717, "compression_loss": 149.323974609375, "distillation_loss": 4.604580402374268, "epoch": 1.59, "learning_rate": 4.2071005917159765e-05, "loss": 154.7705, "step": 1876, "task_loss": 1.841259241104126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3930254105864712, "compression/movement_sparsity/importance_threshold": -0.0003359033662355083, "compression/movement_sparsity/linear_layer_sparsity": 0.8814063306073732, "compression/movement_sparsity/model_sparsity": 0.8511273116279805, "compression_loss": 149.39285278320312, "distillation_loss": 6.943325042724609, "epoch": 1.59, "learning_rate": 4.2066779374471685e-05, "loss": 155.9174, "step": 1877, "task_loss": 2.5194525718688965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3936776928663221, "compression/movement_sparsity/importance_threshold": -0.0003338551806361164, "compression/movement_sparsity/linear_layer_sparsity": 0.8817211524812952, "compression/movement_sparsity/model_sparsity": 0.8514313184020279, "compression_loss": 149.46141052246094, "distillation_loss": 6.574117660522461, "epoch": 1.59, "learning_rate": 4.2062552831783605e-05, "loss": 155.6478, "step": 1878, "task_loss": 3.3019862174987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3943273181955573, "compression/movement_sparsity/importance_threshold": -0.0003318153379400302, "compression/movement_sparsity/linear_layer_sparsity": 0.8820229770124942, "compression/movement_sparsity/model_sparsity": 0.8517227743320593, "compression_loss": 149.5296173095703, "distillation_loss": 7.748133659362793, "epoch": 1.59, "learning_rate": 4.2058326289095524e-05, "loss": 156.5978, "step": 1879, "task_loss": 3.981105089187622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3949742919965253, "compression/movement_sparsity/importance_threshold": -0.0003297838211209171, "compression/movement_sparsity/linear_layer_sparsity": 0.882394820256051, "compression/movement_sparsity/model_sparsity": 0.8520818436162763, "compression_loss": 149.5975799560547, "distillation_loss": 6.929703235626221, "epoch": 1.59, "learning_rate": 4.205409974640744e-05, "loss": 156.2494, "step": 1880, "task_loss": 2.9411325454711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.395618619691574, "compression/movement_sparsity/importance_threshold": -0.0003277606131524436, "compression/movement_sparsity/linear_layer_sparsity": 0.8827588650939739, "compression/movement_sparsity/model_sparsity": 0.8524333823940836, "compression_loss": 149.66529846191406, "distillation_loss": 5.461925506591797, "epoch": 1.59, "learning_rate": 4.204987320371936e-05, "loss": 155.5635, "step": 1881, "task_loss": 3.765636682510376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3962603067030517, "compression/movement_sparsity/importance_threshold": -0.0003257456970082763, "compression/movement_sparsity/linear_layer_sparsity": 0.883219245282227, "compression/movement_sparsity/model_sparsity": 0.852877947106575, "compression_loss": 149.73281860351562, "distillation_loss": 4.841838359832764, "epoch": 1.59, "learning_rate": 4.204564666103128e-05, "loss": 155.8555, "step": 1882, "task_loss": 1.7235074043273926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3968993584533067, "compression/movement_sparsity/importance_threshold": -0.0003237390556620817, "compression/movement_sparsity/linear_layer_sparsity": 0.8835713421041786, "compression/movement_sparsity/model_sparsity": 0.8532179483195163, "compression_loss": 149.7999725341797, "distillation_loss": 6.933751583099365, "epoch": 1.59, "learning_rate": 4.20414201183432e-05, "loss": 155.8288, "step": 1883, "task_loss": 3.108952045440674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3975357803646873, "compression/movement_sparsity/importance_threshold": -0.00032174067208752717, "compression/movement_sparsity/linear_layer_sparsity": 0.8839133033836399, "compression/movement_sparsity/model_sparsity": 0.8535481621770321, "compression_loss": 149.86683654785156, "distillation_loss": 7.251405715942383, "epoch": 1.59, "learning_rate": 4.2037193575655116e-05, "loss": 156.1975, "step": 1884, "task_loss": 3.388500452041626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.398169577859541, "compression/movement_sparsity/importance_threshold": -0.0003197505292582793, "compression/movement_sparsity/linear_layer_sparsity": 0.8841942486973086, "compression/movement_sparsity/model_sparsity": 0.8538194561548873, "compression_loss": 149.9334259033203, "distillation_loss": 6.155484199523926, "epoch": 1.59, "learning_rate": 4.2032967032967036e-05, "loss": 156.3457, "step": 1885, "task_loss": 3.1280503273010254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3988007563602167, "compression/movement_sparsity/importance_threshold": -0.00031776861014800543, "compression/movement_sparsity/linear_layer_sparsity": 0.8845195399904149, "compression/movement_sparsity/model_sparsity": 0.8541335726913624, "compression_loss": 149.99986267089844, "distillation_loss": 6.894721508026123, "epoch": 1.59, "learning_rate": 4.2028740490278956e-05, "loss": 155.4189, "step": 1886, "task_loss": 3.0255980491638184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.3994293212890625, "compression/movement_sparsity/importance_threshold": -0.0003157948977303704, "compression/movement_sparsity/linear_layer_sparsity": 0.8848357331436151, "compression/movement_sparsity/model_sparsity": 0.8544389036370262, "compression_loss": 150.06594848632812, "distillation_loss": 7.085342884063721, "epoch": 1.59, "learning_rate": 4.202451394759087e-05, "loss": 156.295, "step": 1887, "task_loss": 3.3206844329833984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4000552780684263, "compression/movement_sparsity/importance_threshold": -0.00031382937497904245, "compression/movement_sparsity/linear_layer_sparsity": 0.8852122745092204, "compression/movement_sparsity/model_sparsity": 0.8548025096483461, "compression_loss": 150.13168334960938, "distillation_loss": 7.8692169189453125, "epoch": 1.6, "learning_rate": 4.202028740490279e-05, "loss": 156.692, "step": 1888, "task_loss": 3.3173069953918457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4006786321206566, "compression/movement_sparsity/importance_threshold": -0.00031187202486768813, "compression/movement_sparsity/linear_layer_sparsity": 0.8855141467370898, "compression/movement_sparsity/model_sparsity": 0.8550940116365208, "compression_loss": 150.19728088378906, "distillation_loss": 5.269217014312744, "epoch": 1.6, "learning_rate": 4.2016060862214715e-05, "loss": 156.0347, "step": 1889, "task_loss": 3.3313422203063965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4012993888681013, "compression/movement_sparsity/importance_threshold": -0.00030992283036997397, "compression/movement_sparsity/linear_layer_sparsity": 0.8858086379051927, "compression/movement_sparsity/model_sparsity": 0.8553783861270385, "compression_loss": 150.26242065429688, "distillation_loss": 7.1392107009887695, "epoch": 1.6, "learning_rate": 4.201183431952663e-05, "loss": 157.0173, "step": 1890, "task_loss": 4.142314434051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4019175537331088, "compression/movement_sparsity/importance_threshold": -0.00030798177445956563, "compression/movement_sparsity/linear_layer_sparsity": 0.8862039598348245, "compression/movement_sparsity/model_sparsity": 0.855760127532235, "compression_loss": 150.327392578125, "distillation_loss": 6.651957035064697, "epoch": 1.6, "learning_rate": 4.200760777683855e-05, "loss": 156.3176, "step": 1891, "task_loss": 3.3759889602661133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4025331321380274, "compression/movement_sparsity/importance_threshold": -0.00030604884011013224, "compression/movement_sparsity/linear_layer_sparsity": 0.8864758189327545, "compression/movement_sparsity/model_sparsity": 0.8560226474338147, "compression_loss": 150.3920135498047, "distillation_loss": 5.683810234069824, "epoch": 1.6, "learning_rate": 4.200338123415047e-05, "loss": 156.3803, "step": 1892, "task_loss": 2.293818473815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.403146129505205, "compression/movement_sparsity/importance_threshold": -0.0003041240102953386, "compression/movement_sparsity/linear_layer_sparsity": 0.886789090664884, "compression/movement_sparsity/model_sparsity": 0.8563251573182089, "compression_loss": 150.45632934570312, "distillation_loss": 5.966579437255859, "epoch": 1.6, "learning_rate": 4.199915469146238e-05, "loss": 156.61, "step": 1893, "task_loss": 3.467686891555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.40375655125699, "compression/movement_sparsity/importance_threshold": -0.00030220726798885125, "compression/movement_sparsity/linear_layer_sparsity": 0.8871445143296062, "compression/movement_sparsity/model_sparsity": 0.8566683710866367, "compression_loss": 150.5203399658203, "distillation_loss": 7.3996686935424805, "epoch": 1.6, "learning_rate": 4.199492814877431e-05, "loss": 156.2879, "step": 1894, "task_loss": 4.114287376403809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4043644028157305, "compression/movement_sparsity/importance_threshold": -0.0003002985961643393, "compression/movement_sparsity/linear_layer_sparsity": 0.8874655725432017, "compression/movement_sparsity/model_sparsity": 0.8569783999629048, "compression_loss": 150.5841522216797, "distillation_loss": 5.302109718322754, "epoch": 1.6, "learning_rate": 4.1990701606086227e-05, "loss": 156.9972, "step": 1895, "task_loss": 2.8223211765289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4049696896037747, "compression/movement_sparsity/importance_threshold": -0.0002983979777954667, "compression/movement_sparsity/linear_layer_sparsity": 0.8877548051533772, "compression/movement_sparsity/model_sparsity": 0.8572576965431371, "compression_loss": 150.64776611328125, "distillation_loss": 5.442483901977539, "epoch": 1.6, "learning_rate": 4.198647506339814e-05, "loss": 156.8532, "step": 1896, "task_loss": 3.177159070968628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.405572417043471, "compression/movement_sparsity/importance_threshold": -0.00029650539585590087, "compression/movement_sparsity/linear_layer_sparsity": 0.8880860227577986, "compression/movement_sparsity/model_sparsity": 0.8575775358039021, "compression_loss": 150.71109008789062, "distillation_loss": 6.852133750915527, "epoch": 1.6, "learning_rate": 4.198224852071006e-05, "loss": 156.6172, "step": 1897, "task_loss": 3.7614424228668213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4061725905571674, "compression/movement_sparsity/importance_threshold": -0.00029462083331930917, "compression/movement_sparsity/linear_layer_sparsity": 0.8883290492183851, "compression/movement_sparsity/model_sparsity": 0.8578122135579307, "compression_loss": 150.77406311035156, "distillation_loss": 7.884014129638672, "epoch": 1.6, "learning_rate": 4.197802197802198e-05, "loss": 157.232, "step": 1898, "task_loss": 3.245591163635254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4067702155672117, "compression/movement_sparsity/importance_threshold": -0.0002927442731593599, "compression/movement_sparsity/linear_layer_sparsity": 0.8886465898025282, "compression/movement_sparsity/model_sparsity": 0.8581188456461393, "compression_loss": 150.8367462158203, "distillation_loss": 6.228991508483887, "epoch": 1.6, "learning_rate": 4.19737954353339e-05, "loss": 157.7381, "step": 1899, "task_loss": 2.4144697189331055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4073652974959527, "compression/movement_sparsity/importance_threshold": -0.00029087569834971606, "compression/movement_sparsity/linear_layer_sparsity": 0.8889545195075567, "compression/movement_sparsity/model_sparsity": 0.8584161970184975, "compression_loss": 150.89923095703125, "distillation_loss": 5.907983779907227, "epoch": 1.61, "learning_rate": 4.196956889264582e-05, "loss": 156.7937, "step": 1900, "task_loss": 3.688079357147217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4079578417657388, "compression/movement_sparsity/importance_threshold": -0.00028901509186404686, "compression/movement_sparsity/linear_layer_sparsity": 0.8893004157623378, "compression/movement_sparsity/model_sparsity": 0.8587502106728255, "compression_loss": 150.96142578125, "distillation_loss": 5.798238277435303, "epoch": 1.61, "learning_rate": 4.196534234995774e-05, "loss": 157.1843, "step": 1901, "task_loss": 3.2603585720062256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4085478537989173, "compression/movement_sparsity/importance_threshold": -0.0002871624366760179, "compression/movement_sparsity/linear_layer_sparsity": 0.8897274321295456, "compression/movement_sparsity/model_sparsity": 0.8591625577141638, "compression_loss": 151.02337646484375, "distillation_loss": 8.634401321411133, "epoch": 1.61, "learning_rate": 4.196111580726966e-05, "loss": 158.0282, "step": 1902, "task_loss": 3.356994390487671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4091353390178372, "compression/movement_sparsity/importance_threshold": -0.00028531771575929753, "compression/movement_sparsity/linear_layer_sparsity": 0.8899948077404446, "compression/movement_sparsity/model_sparsity": 0.8594207481502848, "compression_loss": 151.08494567871094, "distillation_loss": 6.6648406982421875, "epoch": 1.61, "learning_rate": 4.195688926458157e-05, "loss": 156.7914, "step": 1903, "task_loss": 3.902116298675537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4097203028448462, "compression/movement_sparsity/importance_threshold": -0.00028348091208755047, "compression/movement_sparsity/linear_layer_sparsity": 0.8903204090619096, "compression/movement_sparsity/model_sparsity": 0.8597351640646905, "compression_loss": 151.14620971679688, "distillation_loss": 7.1224775314331055, "epoch": 1.61, "learning_rate": 4.195266272189349e-05, "loss": 157.1437, "step": 1904, "task_loss": 3.330153226852417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.410302750702293, "compression/movement_sparsity/importance_threshold": -0.000281652008634445, "compression/movement_sparsity/linear_layer_sparsity": 0.8906827487438604, "compression/movement_sparsity/model_sparsity": 0.8600850562638792, "compression_loss": 151.20745849609375, "distillation_loss": 5.975131988525391, "epoch": 1.61, "learning_rate": 4.194843617920541e-05, "loss": 157.2221, "step": 1905, "task_loss": 3.449862003326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4108826880125251, "compression/movement_sparsity/importance_threshold": -0.0002798309883736468, "compression/movement_sparsity/linear_layer_sparsity": 0.8909932182965955, "compression/movement_sparsity/model_sparsity": 0.8603848602323617, "compression_loss": 151.2682647705078, "distillation_loss": 6.480030059814453, "epoch": 1.61, "learning_rate": 4.194420963651733e-05, "loss": 157.373, "step": 1906, "task_loss": 3.391955852508545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4114601201978914, "compression/movement_sparsity/importance_threshold": -0.0002780178342788233, "compression/movement_sparsity/linear_layer_sparsity": 0.8912539163736183, "compression/movement_sparsity/model_sparsity": 0.8606366025284378, "compression_loss": 151.32887268066406, "distillation_loss": 8.780585289001465, "epoch": 1.61, "learning_rate": 4.193998309382925e-05, "loss": 157.9215, "step": 1907, "task_loss": 3.4508187770843506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4120350526807395, "compression/movement_sparsity/importance_threshold": -0.000276212529323641, "compression/movement_sparsity/linear_layer_sparsity": 0.8915470362624431, "compression/movement_sparsity/model_sparsity": 0.8609196528473392, "compression_loss": 151.38922119140625, "distillation_loss": 8.277044296264648, "epoch": 1.61, "learning_rate": 4.193575655114117e-05, "loss": 157.7565, "step": 1908, "task_loss": 3.0446062088012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.412607490883418, "compression/movement_sparsity/importance_threshold": -0.0002744150564817664, "compression/movement_sparsity/linear_layer_sparsity": 0.8918629074631171, "compression/movement_sparsity/model_sparsity": 0.8612246729005365, "compression_loss": 151.4491729736328, "distillation_loss": 6.879632949829102, "epoch": 1.61, "learning_rate": 4.193153000845308e-05, "loss": 157.7787, "step": 1909, "task_loss": 2.9869284629821777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.413177440228275, "compression/movement_sparsity/importance_threshold": -0.00027262539872686606, "compression/movement_sparsity/linear_layer_sparsity": 0.8921619894357599, "compression/movement_sparsity/model_sparsity": 0.8615134804873352, "compression_loss": 151.50900268554688, "distillation_loss": 6.028229713439941, "epoch": 1.61, "learning_rate": 4.1927303465765e-05, "loss": 157.0191, "step": 1910, "task_loss": 2.8621063232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4137449061376586, "compression/movement_sparsity/importance_threshold": -0.0002708435390326074, "compression/movement_sparsity/linear_layer_sparsity": 0.892503068326816, "compression/movement_sparsity/model_sparsity": 0.8618428422692023, "compression_loss": 151.56849670410156, "distillation_loss": 7.064677715301514, "epoch": 1.61, "learning_rate": 4.192307692307693e-05, "loss": 158.5267, "step": 1911, "task_loss": 3.214648723602295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4143098940339172, "compression/movement_sparsity/importance_threshold": -0.00026906946037265605, "compression/movement_sparsity/linear_layer_sparsity": 0.8926854484708066, "compression/movement_sparsity/model_sparsity": 0.8620189570941797, "compression_loss": 151.6277313232422, "distillation_loss": 6.206345081329346, "epoch": 1.62, "learning_rate": 4.191885038038885e-05, "loss": 157.3912, "step": 1912, "task_loss": 3.416571617126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4148724093393985, "compression/movement_sparsity/importance_threshold": -0.0002673031457206803, "compression/movement_sparsity/linear_layer_sparsity": 0.8929668707511806, "compression/movement_sparsity/model_sparsity": 0.8622907116534667, "compression_loss": 151.686767578125, "distillation_loss": 6.52351188659668, "epoch": 1.62, "learning_rate": 4.191462383770076e-05, "loss": 158.6139, "step": 1913, "task_loss": 3.0527193546295166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4154324574764514, "compression/movement_sparsity/importance_threshold": -0.0002655445780503458, "compression/movement_sparsity/linear_layer_sparsity": 0.8932720936701558, "compression/movement_sparsity/model_sparsity": 0.8625854492261996, "compression_loss": 151.7454376220703, "distillation_loss": 5.9349365234375, "epoch": 1.62, "learning_rate": 4.191039729501268e-05, "loss": 157.3392, "step": 1914, "task_loss": 3.1626975536346436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4159900438674236, "compression/movement_sparsity/importance_threshold": -0.0002637937403353191, "compression/movement_sparsity/linear_layer_sparsity": 0.893549151705175, "compression/movement_sparsity/model_sparsity": 0.8628529894653858, "compression_loss": 151.8039093017578, "distillation_loss": 6.987100124359131, "epoch": 1.62, "learning_rate": 4.19061707523246e-05, "loss": 158.3103, "step": 1915, "task_loss": 3.1438724994659424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4165451739346633, "compression/movement_sparsity/importance_threshold": -0.00026205061554926846, "compression/movement_sparsity/linear_layer_sparsity": 0.8937929413124903, "compression/movement_sparsity/model_sparsity": 0.8630884041497051, "compression_loss": 151.862060546875, "distillation_loss": 8.266741752624512, "epoch": 1.62, "learning_rate": 4.190194420963652e-05, "loss": 158.4448, "step": 1916, "task_loss": 4.059313774108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.417097853100519, "compression/movement_sparsity/importance_threshold": -0.00026031518666585866, "compression/movement_sparsity/linear_layer_sparsity": 0.8941232526801713, "compression/movement_sparsity/model_sparsity": 0.8634073683057497, "compression_loss": 151.92002868652344, "distillation_loss": 7.328000068664551, "epoch": 1.62, "learning_rate": 4.189771766694844e-05, "loss": 158.5577, "step": 1917, "task_loss": 3.6128668785095215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4176480867873387, "compression/movement_sparsity/importance_threshold": -0.0002585874366587571, "compression/movement_sparsity/linear_layer_sparsity": 0.8943218258438114, "compression/movement_sparsity/model_sparsity": 0.8635991198703362, "compression_loss": 151.97763061523438, "distillation_loss": 7.261575698852539, "epoch": 1.62, "learning_rate": 4.189349112426036e-05, "loss": 158.444, "step": 1918, "task_loss": 3.3322975635528564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4181958804174704, "compression/movement_sparsity/importance_threshold": -0.00025686734850163036, "compression/movement_sparsity/linear_layer_sparsity": 0.8946176763670247, "compression/movement_sparsity/model_sparsity": 0.8638848070179346, "compression_loss": 152.03500366210938, "distillation_loss": 6.789047718048096, "epoch": 1.62, "learning_rate": 4.188926458157227e-05, "loss": 158.5068, "step": 1919, "task_loss": 2.7582054138183594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4187412394132628, "compression/movement_sparsity/importance_threshold": -0.0002551549051681458, "compression/movement_sparsity/linear_layer_sparsity": 0.8949093772799008, "compression/movement_sparsity/model_sparsity": 0.8641664871070764, "compression_loss": 152.0921630859375, "distillation_loss": 6.755656719207764, "epoch": 1.62, "learning_rate": 4.188503803888419e-05, "loss": 158.2574, "step": 1920, "task_loss": 2.849137783050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4192841691970637, "compression/movement_sparsity/importance_threshold": -0.00025345008963196993, "compression/movement_sparsity/linear_layer_sparsity": 0.8950742527458019, "compression/movement_sparsity/model_sparsity": 0.8643256985935075, "compression_loss": 152.14906311035156, "distillation_loss": 5.173419952392578, "epoch": 1.62, "learning_rate": 4.188081149619611e-05, "loss": 158.2758, "step": 1921, "task_loss": 3.542970895767212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4198246751912214, "compression/movement_sparsity/importance_threshold": -0.00025175288486676845, "compression/movement_sparsity/linear_layer_sparsity": 0.8953493313689936, "compression/movement_sparsity/model_sparsity": 0.8645913274197518, "compression_loss": 152.20555114746094, "distillation_loss": 5.921302795410156, "epoch": 1.62, "learning_rate": 4.187658495350803e-05, "loss": 158.864, "step": 1922, "task_loss": 2.270059823989868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4203627628180842, "compression/movement_sparsity/importance_threshold": -0.0002500632738462096, "compression/movement_sparsity/linear_layer_sparsity": 0.8955927990237827, "compression/movement_sparsity/model_sparsity": 0.8648264312116047, "compression_loss": 152.26182556152344, "distillation_loss": 5.613929748535156, "epoch": 1.63, "learning_rate": 4.187235841081995e-05, "loss": 158.1929, "step": 1923, "task_loss": 2.4784929752349854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4208984375, "compression/movement_sparsity/importance_threshold": -0.00024838123954395996, "compression/movement_sparsity/linear_layer_sparsity": 0.8958665302160316, "compression/movement_sparsity/model_sparsity": 0.8650907588953042, "compression_loss": 152.31796264648438, "distillation_loss": 7.734485626220703, "epoch": 1.63, "learning_rate": 4.186813186813187e-05, "loss": 159.5032, "step": 1924, "task_loss": 3.4364078044891357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4214317046593172, "compression/movement_sparsity/importance_threshold": -0.00024670676493368515, "compression/movement_sparsity/linear_layer_sparsity": 0.8960579250307548, "compression/movement_sparsity/model_sparsity": 0.8652755787093424, "compression_loss": 152.37364196777344, "distillation_loss": 6.083627700805664, "epoch": 1.63, "learning_rate": 4.1863905325443785e-05, "loss": 158.3182, "step": 1925, "task_loss": 3.2751035690307617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4219625697183842, "compression/movement_sparsity/importance_threshold": -0.0002450398329890526, "compression/movement_sparsity/linear_layer_sparsity": 0.896364555001511, "compression/movement_sparsity/model_sparsity": 0.8655716749972989, "compression_loss": 152.42921447753906, "distillation_loss": 6.687776565551758, "epoch": 1.63, "learning_rate": 4.1859678782755705e-05, "loss": 158.7509, "step": 1926, "task_loss": 2.933615207672119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4224910380995488, "compression/movement_sparsity/importance_threshold": -0.00024338042668372879, "compression/movement_sparsity/linear_layer_sparsity": 0.8966406591031194, "compression/movement_sparsity/model_sparsity": 0.8658382940736216, "compression_loss": 152.4844970703125, "distillation_loss": 5.581253528594971, "epoch": 1.63, "learning_rate": 4.1855452240067624e-05, "loss": 158.3622, "step": 1927, "task_loss": 2.1447255611419678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4230171152251594, "compression/movement_sparsity/importance_threshold": -0.0002417285289913803, "compression/movement_sparsity/linear_layer_sparsity": 0.8968729418886661, "compression/movement_sparsity/model_sparsity": 0.8660625972308992, "compression_loss": 152.53945922851562, "distillation_loss": 5.564861297607422, "epoch": 1.63, "learning_rate": 4.185122569737955e-05, "loss": 158.6945, "step": 1928, "task_loss": 4.0267014503479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4235408065175643, "compression/movement_sparsity/importance_threshold": -0.0002400841228856745, "compression/movement_sparsity/linear_layer_sparsity": 0.8972512122585785, "compression/movement_sparsity/model_sparsity": 0.8664278728499094, "compression_loss": 152.59423828125, "distillation_loss": 7.034060955047607, "epoch": 1.63, "learning_rate": 4.1846999154691464e-05, "loss": 160.2622, "step": 1929, "task_loss": 2.7290186882019043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4240621173991113, "compression/movement_sparsity/importance_threshold": -0.00023844719134027796, "compression/movement_sparsity/linear_layer_sparsity": 0.8975155949034009, "compression/movement_sparsity/model_sparsity": 0.866683173137546, "compression_loss": 152.64881896972656, "distillation_loss": 6.278230667114258, "epoch": 1.63, "learning_rate": 4.1842772612003383e-05, "loss": 158.2031, "step": 1930, "task_loss": 3.3503847122192383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.424581053292149, "compression/movement_sparsity/importance_threshold": -0.00023681771732885631, "compression/movement_sparsity/linear_layer_sparsity": 0.8977801683349054, "compression/movement_sparsity/model_sparsity": 0.8669386576577552, "compression_loss": 152.7030029296875, "distillation_loss": 5.02880334854126, "epoch": 1.63, "learning_rate": 4.18385460693153e-05, "loss": 158.8585, "step": 1931, "task_loss": 3.569124698638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4250976196190255, "compression/movement_sparsity/importance_threshold": -0.00023519568382507784, "compression/movement_sparsity/linear_layer_sparsity": 0.8980975062081986, "compression/movement_sparsity/model_sparsity": 0.8672450939988554, "compression_loss": 152.757080078125, "distillation_loss": 8.632002830505371, "epoch": 1.63, "learning_rate": 4.1834319526627216e-05, "loss": 159.2811, "step": 1932, "task_loss": 4.659265041351318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.425611821802089, "compression/movement_sparsity/importance_threshold": -0.0002335810738026082, "compression/movement_sparsity/linear_layer_sparsity": 0.8982784435279052, "compression/movement_sparsity/model_sparsity": 0.8674198155650017, "compression_loss": 152.81076049804688, "distillation_loss": 8.428467750549316, "epoch": 1.63, "learning_rate": 4.183009298393914e-05, "loss": 159.3197, "step": 1933, "task_loss": 4.047605991363525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4261236652636875, "compression/movement_sparsity/importance_threshold": -0.00023197387023511477, "compression/movement_sparsity/linear_layer_sparsity": 0.8985142797154073, "compression/movement_sparsity/model_sparsity": 0.8676475500539462, "compression_loss": 152.86431884765625, "distillation_loss": 8.082972526550293, "epoch": 1.63, "learning_rate": 4.182586644125106e-05, "loss": 159.943, "step": 1934, "task_loss": 3.781964063644409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4266331554261695, "compression/movement_sparsity/importance_threshold": -0.00023037405609626325, "compression/movement_sparsity/linear_layer_sparsity": 0.8987905030586921, "compression/movement_sparsity/model_sparsity": 0.8679142842756267, "compression_loss": 152.91769409179688, "distillation_loss": 7.441792964935303, "epoch": 1.64, "learning_rate": 4.1821639898562975e-05, "loss": 160.1229, "step": 1935, "task_loss": 3.8719069957733154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4271402977118828, "compression/movement_sparsity/importance_threshold": -0.0002287816143597219, "compression/movement_sparsity/linear_layer_sparsity": 0.8989635942760943, "compression/movement_sparsity/model_sparsity": 0.8680814292772203, "compression_loss": 152.97080993652344, "distillation_loss": 6.845407962799072, "epoch": 1.64, "learning_rate": 4.1817413355874895e-05, "loss": 159.2656, "step": 1936, "task_loss": 3.4963345527648926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.427645097543176, "compression/movement_sparsity/importance_threshold": -0.00022719652799915637, "compression/movement_sparsity/linear_layer_sparsity": 0.8991851453107687, "compression/movement_sparsity/model_sparsity": 0.8682953693522828, "compression_loss": 153.02357482910156, "distillation_loss": 7.393518447875977, "epoch": 1.64, "learning_rate": 4.1813186813186815e-05, "loss": 159.0549, "step": 1937, "task_loss": 3.1124753952026367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.428147560342397, "compression/movement_sparsity/importance_threshold": -0.00022561877998823407, "compression/movement_sparsity/linear_layer_sparsity": 0.8994629903408519, "compression/movement_sparsity/model_sparsity": 0.8685636695508314, "compression_loss": 153.07615661621094, "distillation_loss": 7.813177585601807, "epoch": 1.64, "learning_rate": 4.1808960270498735e-05, "loss": 159.6019, "step": 1938, "task_loss": 2.669848918914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4286476915318942, "compression/movement_sparsity/importance_threshold": -0.00022404835330062066, "compression/movement_sparsity/linear_layer_sparsity": 0.8996588447942709, "compression/movement_sparsity/model_sparsity": 0.8687527958012567, "compression_loss": 153.12831115722656, "distillation_loss": 6.959723949432373, "epoch": 1.64, "learning_rate": 4.1804733727810654e-05, "loss": 160.1599, "step": 1939, "task_loss": 2.681326150894165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4291454965340158, "compression/movement_sparsity/importance_threshold": -0.00022248523090998354, "compression/movement_sparsity/linear_layer_sparsity": 0.899899069075463, "compression/movement_sparsity/model_sparsity": 0.8689847676393736, "compression_loss": 153.18040466308594, "distillation_loss": 6.1097941398620605, "epoch": 1.64, "learning_rate": 4.1800507185122574e-05, "loss": 159.2079, "step": 1940, "task_loss": 3.562547445297241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.42964098077111, "compression/movement_sparsity/importance_threshold": -0.00022092939578998924, "compression/movement_sparsity/linear_layer_sparsity": 0.9002211169949724, "compression/movement_sparsity/model_sparsity": 0.8692957522221125, "compression_loss": 153.23208618164062, "distillation_loss": 5.258612632751465, "epoch": 1.64, "learning_rate": 4.1796280642434494e-05, "loss": 159.4402, "step": 1941, "task_loss": 2.9537127017974854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4301341496655244, "compression/movement_sparsity/importance_threshold": -0.00021938083091430517, "compression/movement_sparsity/linear_layer_sparsity": 0.9004457802373996, "compression/movement_sparsity/model_sparsity": 0.8695126975910175, "compression_loss": 153.28353881835938, "distillation_loss": 5.468062877655029, "epoch": 1.64, "learning_rate": 4.179205409974641e-05, "loss": 159.5712, "step": 1942, "task_loss": 2.7087302207946777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4306250086396082, "compression/movement_sparsity/importance_threshold": -0.00021783951925659698, "compression/movement_sparsity/linear_layer_sparsity": 0.900659604411446, "compression/movement_sparsity/model_sparsity": 0.8697191762468852, "compression_loss": 153.334716796875, "distillation_loss": 7.427664756774902, "epoch": 1.64, "learning_rate": 4.1787827557058327e-05, "loss": 159.6785, "step": 1943, "task_loss": 3.4780051708221436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4311135631157088, "compression/movement_sparsity/importance_threshold": -0.00021630544379053207, "compression/movement_sparsity/linear_layer_sparsity": 0.9010169240188223, "compression/movement_sparsity/model_sparsity": 0.8700642208265044, "compression_loss": 153.3856201171875, "distillation_loss": 6.463655948638916, "epoch": 1.64, "learning_rate": 4.1783601014370246e-05, "loss": 159.6995, "step": 1944, "task_loss": 2.984783887863159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.431599818516175, "compression/movement_sparsity/importance_threshold": -0.00021477858748977698, "compression/movement_sparsity/linear_layer_sparsity": 0.9014293213565086, "compression/movement_sparsity/model_sparsity": 0.8704624510469586, "compression_loss": 153.43626403808594, "distillation_loss": 8.109355926513672, "epoch": 1.64, "learning_rate": 4.1779374471682166e-05, "loss": 159.4469, "step": 1945, "task_loss": 4.172676086425781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4320837802633544, "compression/movement_sparsity/importance_threshold": -0.00021325893332799824, "compression/movement_sparsity/linear_layer_sparsity": 0.9016254023691127, "compression/movement_sparsity/model_sparsity": 0.870651796073564, "compression_loss": 153.4867706298828, "distillation_loss": 7.0241312980651855, "epoch": 1.64, "learning_rate": 4.1775147928994086e-05, "loss": 159.8084, "step": 1946, "task_loss": 4.404529571533203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4325654537795955, "compression/movement_sparsity/importance_threshold": -0.00021174646427886325, "compression/movement_sparsity/linear_layer_sparsity": 0.9018634564517951, "compression/movement_sparsity/model_sparsity": 0.8708816722661662, "compression_loss": 153.5367431640625, "distillation_loss": 6.621567249298096, "epoch": 1.65, "learning_rate": 4.1770921386306005e-05, "loss": 159.4945, "step": 1947, "task_loss": 3.0116825103759766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4330448444872466, "compression/movement_sparsity/importance_threshold": -0.00021024116331603854, "compression/movement_sparsity/linear_layer_sparsity": 0.9020940340813698, "compression/movement_sparsity/model_sparsity": 0.8711043288448252, "compression_loss": 153.58668518066406, "distillation_loss": 7.074351787567139, "epoch": 1.65, "learning_rate": 4.176669484361792e-05, "loss": 160.0091, "step": 1948, "task_loss": 4.423213005065918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4335219578086558, "compression/movement_sparsity/importance_threshold": -0.00020874301341318978, "compression/movement_sparsity/linear_layer_sparsity": 0.9024408246487235, "compression/movement_sparsity/model_sparsity": 0.8714392060893379, "compression_loss": 153.63632202148438, "distillation_loss": 8.042926788330078, "epoch": 1.65, "learning_rate": 4.176246830092984e-05, "loss": 160.5751, "step": 1949, "task_loss": 3.651385545730591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.433996799166171, "compression/movement_sparsity/importance_threshold": -0.00020725199754398523, "compression/movement_sparsity/linear_layer_sparsity": 0.9026384200306174, "compression/movement_sparsity/model_sparsity": 0.8716300134619892, "compression_loss": 153.68568420410156, "distillation_loss": 9.074098587036133, "epoch": 1.65, "learning_rate": 4.1758241758241765e-05, "loss": 160.7414, "step": 1950, "task_loss": 3.487760066986084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.434469373982141, "compression/movement_sparsity/importance_threshold": -0.00020576809868208968, "compression/movement_sparsity/linear_layer_sparsity": 0.9027966298868101, "compression/movement_sparsity/model_sparsity": 0.8717827883229112, "compression_loss": 153.7347869873047, "distillation_loss": 6.59572696685791, "epoch": 1.65, "learning_rate": 4.175401521555368e-05, "loss": 160.0203, "step": 1951, "task_loss": 2.638840913772583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4349396876789133, "compression/movement_sparsity/importance_threshold": -0.00020429129980117228, "compression/movement_sparsity/linear_layer_sparsity": 0.9029612549451907, "compression/movement_sparsity/model_sparsity": 0.8719417580040907, "compression_loss": 153.78375244140625, "distillation_loss": 7.987120151519775, "epoch": 1.65, "learning_rate": 4.17497886728656e-05, "loss": 159.8054, "step": 1952, "task_loss": 2.8663811683654785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4354077456788368, "compression/movement_sparsity/importance_threshold": -0.00020282158387489781, "compression/movement_sparsity/linear_layer_sparsity": 0.9032176603218648, "compression/movement_sparsity/model_sparsity": 0.8721893550672807, "compression_loss": 153.83242797851562, "distillation_loss": 7.214272975921631, "epoch": 1.65, "learning_rate": 4.174556213017752e-05, "loss": 160.1428, "step": 1953, "task_loss": 3.519016742706299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.435873553404259, "compression/movement_sparsity/importance_threshold": -0.00020135893387693456, "compression/movement_sparsity/linear_layer_sparsity": 0.9034533057226847, "compression/movement_sparsity/model_sparsity": 0.8724169053236523, "compression_loss": 153.8809356689453, "distillation_loss": 6.956715106964111, "epoch": 1.65, "learning_rate": 4.174133558748943e-05, "loss": 160.8754, "step": 1954, "task_loss": 3.084801435470581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.436337116277529, "compression/movement_sparsity/importance_threshold": -0.0001999033327809473, "compression/movement_sparsity/linear_layer_sparsity": 0.9036167502884694, "compression/movement_sparsity/model_sparsity": 0.8725747350657881, "compression_loss": 153.92904663085938, "distillation_loss": 5.978443145751953, "epoch": 1.65, "learning_rate": 4.173710904480136e-05, "loss": 160.0217, "step": 1955, "task_loss": 3.376854419708252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4367984397209939, "compression/movement_sparsity/importance_threshold": -0.00019845476356060432, "compression/movement_sparsity/linear_layer_sparsity": 0.9039561001752184, "compression/movement_sparsity/model_sparsity": 0.8729024272399649, "compression_loss": 153.97708129882812, "distillation_loss": 8.853143692016602, "epoch": 1.65, "learning_rate": 4.1732882502113276e-05, "loss": 160.8888, "step": 1956, "task_loss": 4.122030735015869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4372575291570024, "compression/movement_sparsity/importance_threshold": -0.00019701320918957214, "compression/movement_sparsity/linear_layer_sparsity": 0.9041730007845148, "compression/movement_sparsity/model_sparsity": 0.8731118766460677, "compression_loss": 154.0248260498047, "distillation_loss": 6.055191993713379, "epoch": 1.65, "learning_rate": 4.1728655959425196e-05, "loss": 160.1562, "step": 1957, "task_loss": 2.702770948410034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.437714390007903, "compression/movement_sparsity/importance_threshold": -0.00019557865264151642, "compression/movement_sparsity/linear_layer_sparsity": 0.9044220012530869, "compression/movement_sparsity/model_sparsity": 0.8733523231825293, "compression_loss": 154.07228088378906, "distillation_loss": 6.207364082336426, "epoch": 1.65, "learning_rate": 4.172442941673711e-05, "loss": 160.834, "step": 1958, "task_loss": 3.1016321182250977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4381690276960435, "compression/movement_sparsity/importance_threshold": -0.00019415107689010544, "compression/movement_sparsity/linear_layer_sparsity": 0.9046773573030089, "compression/movement_sparsity/model_sparsity": 0.8735989069665694, "compression_loss": 154.11944580078125, "distillation_loss": 7.013033390045166, "epoch": 1.66, "learning_rate": 4.172020287404903e-05, "loss": 160.567, "step": 1959, "task_loss": 2.974773406982422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4386214476437722, "compression/movement_sparsity/importance_threshold": -0.00019273046490900485, "compression/movement_sparsity/linear_layer_sparsity": 0.9049495860501356, "compression/movement_sparsity/model_sparsity": 0.8738617838187587, "compression_loss": 154.16651916503906, "distillation_loss": 7.334329128265381, "epoch": 1.66, "learning_rate": 4.171597633136095e-05, "loss": 160.7697, "step": 1960, "task_loss": 3.0747008323669434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4390716552734375, "compression/movement_sparsity/importance_threshold": -0.0001913167996718812, "compression/movement_sparsity/linear_layer_sparsity": 0.9051572931261848, "compression/movement_sparsity/model_sparsity": 0.8740623555177638, "compression_loss": 154.21322631835938, "distillation_loss": 5.816250801086426, "epoch": 1.66, "learning_rate": 4.171174978867287e-05, "loss": 160.3924, "step": 1961, "task_loss": 2.6259987354278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4395196560073873, "compression/movement_sparsity/importance_threshold": -0.00018991006415240187, "compression/movement_sparsity/linear_layer_sparsity": 0.9053880853907769, "compression/movement_sparsity/model_sparsity": 0.8742852193580671, "compression_loss": 154.2598114013672, "distillation_loss": 6.2397942543029785, "epoch": 1.66, "learning_rate": 4.170752324598479e-05, "loss": 160.3997, "step": 1962, "task_loss": 2.1425631046295166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.43996545526797, "compression/movement_sparsity/importance_threshold": -0.0001885102413242334, "compression/movement_sparsity/linear_layer_sparsity": 0.9055441012001245, "compression/movement_sparsity/model_sparsity": 0.8744358755444029, "compression_loss": 154.30604553222656, "distillation_loss": 6.018580436706543, "epoch": 1.66, "learning_rate": 4.170329670329671e-05, "loss": 159.8971, "step": 1963, "task_loss": 2.2636215686798096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4404090584775335, "compression/movement_sparsity/importance_threshold": -0.0001871173141610432, "compression/movement_sparsity/linear_layer_sparsity": 0.9056863803683557, "compression/movement_sparsity/model_sparsity": 0.8745732669855034, "compression_loss": 154.35211181640625, "distillation_loss": 6.947786331176758, "epoch": 1.66, "learning_rate": 4.169907016060862e-05, "loss": 160.8568, "step": 1964, "task_loss": 3.9039864540100098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4408504710584262, "compression/movement_sparsity/importance_threshold": -0.00018573126563649692, "compression/movement_sparsity/linear_layer_sparsity": 0.9058814716750461, "compression/movement_sparsity/model_sparsity": 0.8747616563056378, "compression_loss": 154.39797973632812, "distillation_loss": 8.172338485717773, "epoch": 1.66, "learning_rate": 4.169484361792054e-05, "loss": 161.6974, "step": 1965, "task_loss": 4.233997344970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4412896984329964, "compression/movement_sparsity/importance_threshold": -0.0001843520787242611, "compression/movement_sparsity/linear_layer_sparsity": 0.9061074465759132, "compression/movement_sparsity/model_sparsity": 0.8749798682734802, "compression_loss": 154.44369506835938, "distillation_loss": 6.936229705810547, "epoch": 1.66, "learning_rate": 4.169061707523246e-05, "loss": 161.4053, "step": 1966, "task_loss": 3.916936159133911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4417267460235919, "compression/movement_sparsity/importance_threshold": -0.000182979736398004, "compression/movement_sparsity/linear_layer_sparsity": 0.9062437636603264, "compression/movement_sparsity/model_sparsity": 0.8751115024466835, "compression_loss": 154.4892578125, "distillation_loss": 8.030532836914062, "epoch": 1.66, "learning_rate": 4.168639053254438e-05, "loss": 161.321, "step": 1967, "task_loss": 4.089968204498291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4421616192525615, "compression/movement_sparsity/importance_threshold": -0.0001816142216313904, "compression/movement_sparsity/linear_layer_sparsity": 0.9065616261969957, "compression/movement_sparsity/model_sparsity": 0.8754184454273585, "compression_loss": 154.53453063964844, "distillation_loss": 5.467465877532959, "epoch": 1.66, "learning_rate": 4.16821639898563e-05, "loss": 161.0242, "step": 1968, "task_loss": 2.6575448513031006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.442594323542253, "compression/movement_sparsity/importance_threshold": -0.0001802555173980886, "compression/movement_sparsity/linear_layer_sparsity": 0.9066594043716097, "compression/movement_sparsity/model_sparsity": 0.8755128646208737, "compression_loss": 154.5795440673828, "distillation_loss": 7.316758155822754, "epoch": 1.66, "learning_rate": 4.167793744716822e-05, "loss": 160.6143, "step": 1969, "task_loss": 3.519580125808716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4430248643150148, "compression/movement_sparsity/importance_threshold": -0.00017890360667176426, "compression/movement_sparsity/linear_layer_sparsity": 0.9069811422627606, "compression/movement_sparsity/model_sparsity": 0.875823549825682, "compression_loss": 154.6243133544922, "distillation_loss": 6.348191261291504, "epoch": 1.66, "learning_rate": 4.167371090448014e-05, "loss": 160.7449, "step": 1970, "task_loss": 3.5273609161376953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4434532469931947, "compression/movement_sparsity/importance_threshold": -0.00017755847242608477, "compression/movement_sparsity/linear_layer_sparsity": 0.9072157860334991, "compression/movement_sparsity/model_sparsity": 0.8760501328610469, "compression_loss": 154.6687469482422, "distillation_loss": 5.233705043792725, "epoch": 1.67, "learning_rate": 4.166948436179205e-05, "loss": 160.5417, "step": 1971, "task_loss": 2.6867949962615967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4438794769991412, "compression/movement_sparsity/importance_threshold": -0.00017622009763471665, "compression/movement_sparsity/linear_layer_sparsity": 0.9072905982612465, "compression/movement_sparsity/model_sparsity": 0.8761223750586219, "compression_loss": 154.7130126953125, "distillation_loss": 5.704310894012451, "epoch": 1.67, "learning_rate": 4.166525781910398e-05, "loss": 161.2749, "step": 1972, "task_loss": 2.7655320167541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4443035597552023, "compression/movement_sparsity/importance_threshold": -0.00017488846527132645, "compression/movement_sparsity/linear_layer_sparsity": 0.9074928321443507, "compression/movement_sparsity/model_sparsity": 0.8763176615856973, "compression_loss": 154.75701904296875, "distillation_loss": 5.427280426025391, "epoch": 1.67, "learning_rate": 4.16610312764159e-05, "loss": 161.2508, "step": 1973, "task_loss": 4.193160057067871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4447255006837267, "compression/movement_sparsity/importance_threshold": -0.0001735635583095807, "compression/movement_sparsity/linear_layer_sparsity": 0.9076441498316499, "compression/movement_sparsity/model_sparsity": 0.8764637810449301, "compression_loss": 154.80084228515625, "distillation_loss": 8.274840354919434, "epoch": 1.67, "learning_rate": 4.165680473372781e-05, "loss": 162.25, "step": 1974, "task_loss": 3.675563097000122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4451453052070622, "compression/movement_sparsity/importance_threshold": -0.0001722453597231468, "compression/movement_sparsity/linear_layer_sparsity": 0.9078646038429017, "compression/movement_sparsity/model_sparsity": 0.8766766617826995, "compression_loss": 154.8443603515625, "distillation_loss": 5.241974353790283, "epoch": 1.67, "learning_rate": 4.165257819103973e-05, "loss": 160.6418, "step": 1975, "task_loss": 2.6789393424987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.445562978747557, "compression/movement_sparsity/importance_threshold": -0.00017093385248569126, "compression/movement_sparsity/linear_layer_sparsity": 0.908017221264473, "compression/movement_sparsity/model_sparsity": 0.8768240363263339, "compression_loss": 154.88751220703125, "distillation_loss": 7.543951988220215, "epoch": 1.67, "learning_rate": 4.164835164835165e-05, "loss": 161.3425, "step": 1976, "task_loss": 3.8015730381011963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.445978526727559, "compression/movement_sparsity/importance_threshold": -0.00016962901957088064, "compression/movement_sparsity/linear_layer_sparsity": 0.9082465945531165, "compression/movement_sparsity/model_sparsity": 0.8770455299368777, "compression_loss": 154.93064880371094, "distillation_loss": 6.970600128173828, "epoch": 1.67, "learning_rate": 4.164412510566357e-05, "loss": 160.9763, "step": 1977, "task_loss": 3.432961940765381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.446391954569417, "compression/movement_sparsity/importance_threshold": -0.00016833084395238233, "compression/movement_sparsity/linear_layer_sparsity": 0.908361388514947, "compression/movement_sparsity/model_sparsity": 0.8771563803729717, "compression_loss": 154.97344970703125, "distillation_loss": 6.539155006408691, "epoch": 1.67, "learning_rate": 4.163989856297549e-05, "loss": 161.7528, "step": 1978, "task_loss": 3.08008074760437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4468032676954792, "compression/movement_sparsity/importance_threshold": -0.00016703930860386112, "compression/movement_sparsity/linear_layer_sparsity": 0.9086290145333663, "compression/movement_sparsity/model_sparsity": 0.8774148126143444, "compression_loss": 155.0161895751953, "distillation_loss": 6.985867977142334, "epoch": 1.67, "learning_rate": 4.163567202028741e-05, "loss": 161.6048, "step": 1979, "task_loss": 3.723555564880371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.447212471528093, "compression/movement_sparsity/importance_threshold": -0.00016575439649898615, "compression/movement_sparsity/linear_layer_sparsity": 0.9087660947645082, "compression/movement_sparsity/model_sparsity": 0.8775471837178385, "compression_loss": 155.05844116210938, "distillation_loss": 7.06993293762207, "epoch": 1.67, "learning_rate": 4.163144547759932e-05, "loss": 162.1096, "step": 1980, "task_loss": 3.500683307647705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4476195714896074, "compression/movement_sparsity/importance_threshold": -0.00016447609061142222, "compression/movement_sparsity/linear_layer_sparsity": 0.9088791597220315, "compression/movement_sparsity/model_sparsity": 0.8776563645462423, "compression_loss": 155.1007080078125, "distillation_loss": 6.764288425445557, "epoch": 1.67, "learning_rate": 4.162721893491124e-05, "loss": 162.03, "step": 1981, "task_loss": 3.8429129123687744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4480245730023702, "compression/movement_sparsity/importance_threshold": -0.0001632043739148376, "compression/movement_sparsity/linear_layer_sparsity": 0.9090550531188281, "compression/movement_sparsity/model_sparsity": 0.8778262154637475, "compression_loss": 155.1427459716797, "distillation_loss": 7.9922990798950195, "epoch": 1.67, "learning_rate": 4.162299239222316e-05, "loss": 161.8125, "step": 1982, "task_loss": 3.641343593597412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.44842748148873, "compression/movement_sparsity/importance_threshold": -0.00016193922938289706, "compression/movement_sparsity/linear_layer_sparsity": 0.90920025370813, "compression/movement_sparsity/model_sparsity": 0.8779664279661177, "compression_loss": 155.18458557128906, "distillation_loss": 7.973152160644531, "epoch": 1.68, "learning_rate": 4.161876584953508e-05, "loss": 161.307, "step": 1983, "task_loss": 2.9139230251312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4488283023710342, "compression/movement_sparsity/importance_threshold": -0.00016068063998926976, "compression/movement_sparsity/linear_layer_sparsity": 0.9093686468034836, "compression/movement_sparsity/model_sparsity": 0.8781290362406081, "compression_loss": 155.22616577148438, "distillation_loss": 5.324052810668945, "epoch": 1.68, "learning_rate": 4.1614539306847e-05, "loss": 161.4149, "step": 1984, "task_loss": 3.243025541305542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.449227041071632, "compression/movement_sparsity/importance_threshold": -0.0001594285887076205, "compression/movement_sparsity/linear_layer_sparsity": 0.9095258550295948, "compression/movement_sparsity/model_sparsity": 0.8782808438805234, "compression_loss": 155.26771545410156, "distillation_loss": 6.979626655578613, "epoch": 1.68, "learning_rate": 4.161031276415892e-05, "loss": 161.7736, "step": 1985, "task_loss": 3.892354965209961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4496237030128707, "compression/movement_sparsity/importance_threshold": -0.00015818305851161667, "compression/movement_sparsity/linear_layer_sparsity": 0.9095826498400444, "compression/movement_sparsity/model_sparsity": 0.8783356876145128, "compression_loss": 155.30886840820312, "distillation_loss": 4.373015403747559, "epoch": 1.68, "learning_rate": 4.160608622147084e-05, "loss": 161.0504, "step": 1986, "task_loss": 2.4976749420166016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.450018293617099, "compression/movement_sparsity/importance_threshold": -0.00015694403237492566, "compression/movement_sparsity/linear_layer_sparsity": 0.909729794068671, "compression/movement_sparsity/model_sparsity": 0.8784777769862174, "compression_loss": 155.34994506835938, "distillation_loss": 5.695289134979248, "epoch": 1.68, "learning_rate": 4.1601859678782754e-05, "loss": 161.8875, "step": 1987, "task_loss": 3.5976452827453613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4504108183066649, "compression/movement_sparsity/importance_threshold": -0.00015571149327121316, "compression/movement_sparsity/linear_layer_sparsity": 0.9099929962208974, "compression/movement_sparsity/model_sparsity": 0.8787319373348104, "compression_loss": 155.3907012939453, "distillation_loss": 6.410995006561279, "epoch": 1.68, "learning_rate": 4.1597633136094674e-05, "loss": 161.9052, "step": 1988, "task_loss": 3.247265338897705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.450801282503917, "compression/movement_sparsity/importance_threshold": -0.00015448542417414654, "compression/movement_sparsity/linear_layer_sparsity": 0.9102200442968519, "compression/movement_sparsity/model_sparsity": 0.8789511856108742, "compression_loss": 155.43121337890625, "distillation_loss": 7.766597747802734, "epoch": 1.68, "learning_rate": 4.15934065934066e-05, "loss": 161.5383, "step": 1989, "task_loss": 3.7913436889648438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4511896916312026, "compression/movement_sparsity/importance_threshold": -0.00015326580805739235, "compression/movement_sparsity/linear_layer_sparsity": 0.9104185220671508, "compression/movement_sparsity/model_sparsity": 0.8791428450591743, "compression_loss": 155.47146606445312, "distillation_loss": 6.82233190536499, "epoch": 1.68, "learning_rate": 4.1589180050718514e-05, "loss": 161.8416, "step": 1990, "task_loss": 3.6756303310394287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4515760511108708, "compression/movement_sparsity/importance_threshold": -0.00015205262789461625, "compression/movement_sparsity/linear_layer_sparsity": 0.9106232957979615, "compression/movement_sparsity/model_sparsity": 0.8793405841823739, "compression_loss": 155.51161193847656, "distillation_loss": 7.415669918060303, "epoch": 1.68, "learning_rate": 4.158495350803043e-05, "loss": 161.8125, "step": 1991, "task_loss": 3.576765537261963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4519603663652696, "compression/movement_sparsity/importance_threshold": -0.0001508458666594865, "compression/movement_sparsity/linear_layer_sparsity": 0.9108281529979456, "compression/movement_sparsity/model_sparsity": 0.8795384039073242, "compression_loss": 155.5514678955078, "distillation_loss": 7.50686502456665, "epoch": 1.68, "learning_rate": 4.158072696534235e-05, "loss": 162.2207, "step": 1992, "task_loss": 3.2191779613494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4523426428167467, "compression/movement_sparsity/importance_threshold": -0.00014964550732566965, "compression/movement_sparsity/linear_layer_sparsity": 0.9109674153517648, "compression/movement_sparsity/model_sparsity": 0.8796728821708686, "compression_loss": 155.59109497070312, "distillation_loss": 7.088069915771484, "epoch": 1.68, "learning_rate": 4.1576500422654266e-05, "loss": 162.4279, "step": 1993, "task_loss": 3.2839553356170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.452722885887651, "compression/movement_sparsity/importance_threshold": -0.00014845153286683135, "compression/movement_sparsity/linear_layer_sparsity": 0.9111283796906814, "compression/movement_sparsity/model_sparsity": 0.8798283168895591, "compression_loss": 155.63050842285156, "distillation_loss": 6.707387447357178, "epoch": 1.69, "learning_rate": 4.157227387996619e-05, "loss": 162.0499, "step": 1994, "task_loss": 3.114069700241089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4531011010003303, "compression/movement_sparsity/importance_threshold": -0.00014726392625663987, "compression/movement_sparsity/linear_layer_sparsity": 0.9113064790584906, "compression/movement_sparsity/model_sparsity": 0.8800002979961864, "compression_loss": 155.6697235107422, "distillation_loss": 6.702914237976074, "epoch": 1.69, "learning_rate": 4.156804733727811e-05, "loss": 161.6484, "step": 1995, "task_loss": 2.559359073638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4534772935771327, "compression/movement_sparsity/importance_threshold": -0.00014608267046876, "compression/movement_sparsity/linear_layer_sparsity": 0.9115579955264339, "compression/movement_sparsity/model_sparsity": 0.8802431740997007, "compression_loss": 155.70875549316406, "distillation_loss": 5.769513130187988, "epoch": 1.69, "learning_rate": 4.1563820794590025e-05, "loss": 161.2939, "step": 1996, "task_loss": 2.9781363010406494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4538514690404065, "compression/movement_sparsity/importance_threshold": -0.00014490774847686003, "compression/movement_sparsity/linear_layer_sparsity": 0.9116636913483581, "compression/movement_sparsity/model_sparsity": 0.8803452389449834, "compression_loss": 155.74737548828125, "distillation_loss": 6.210541248321533, "epoch": 1.69, "learning_rate": 4.1559594251901945e-05, "loss": 161.9734, "step": 1997, "task_loss": 2.5509679317474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4542236328125, "compression/movement_sparsity/importance_threshold": -0.00014373914325460646, "compression/movement_sparsity/linear_layer_sparsity": 0.9118287814492767, "compression/movement_sparsity/model_sparsity": 0.8805046576930589, "compression_loss": 155.7859344482422, "distillation_loss": 5.039443016052246, "epoch": 1.69, "learning_rate": 4.1555367709213865e-05, "loss": 162.1594, "step": 1998, "task_loss": 2.883822441101074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4545937903157613, "compression/movement_sparsity/importance_threshold": -0.00014257683777566497, "compression/movement_sparsity/linear_layer_sparsity": 0.911973838948567, "compression/movement_sparsity/model_sparsity": 0.8806447320209995, "compression_loss": 155.82421875, "distillation_loss": 6.975203514099121, "epoch": 1.69, "learning_rate": 4.1551141166525784e-05, "loss": 162.5386, "step": 1999, "task_loss": 3.747464895248413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4549619469725386, "compression/movement_sparsity/importance_threshold": -0.0001414208150137047, "compression/movement_sparsity/linear_layer_sparsity": 0.9121312975821986, "compression/movement_sparsity/model_sparsity": 0.8807967814661664, "compression_loss": 155.86231994628906, "distillation_loss": 5.985924243927002, "epoch": 1.69, "learning_rate": 4.1546914623837704e-05, "loss": 162.0751, "step": 2000, "task_loss": 2.508779525756836 }, { "epoch": 1.69, "eval_accuracy": 0.35116831683168315, "eval_loss": 162.501953125, "eval_runtime": 377.4016, "eval_samples_per_second": 66.905, "eval_steps_per_second": 0.525, "step": 2000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.45532810820518, "compression/movement_sparsity/importance_threshold": -0.0001402710579423887, "compression/movement_sparsity/linear_layer_sparsity": 0.9123718557400844, "compression/movement_sparsity/model_sparsity": 0.8810290757112854, "compression_loss": 155.9002227783203, "distillation_loss": 8.141429901123047, "epoch": 1.69, "learning_rate": 4.1542688081149624e-05, "loss": 163.2476, "step": 2001, "task_loss": 3.9372689723968506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.455692279436034, "compression/movement_sparsity/importance_threshold": -0.000139127549535387, "compression/movement_sparsity/linear_layer_sparsity": 0.9125571692293134, "compression/movement_sparsity/model_sparsity": 0.8812080231120684, "compression_loss": 155.93775939941406, "distillation_loss": 7.465813636779785, "epoch": 1.69, "learning_rate": 4.1538461538461544e-05, "loss": 161.7226, "step": 2002, "task_loss": 3.714134693145752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4560544660874486, "compression/movement_sparsity/importance_threshold": -0.00013799027276636434, "compression/movement_sparsity/linear_layer_sparsity": 0.9126854971214106, "compression/movement_sparsity/model_sparsity": 0.8813319425462892, "compression_loss": 155.9752655029297, "distillation_loss": 6.34322452545166, "epoch": 1.69, "learning_rate": 4.153423499577346e-05, "loss": 162.0157, "step": 2003, "task_loss": 3.737729787826538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4564146735817722, "compression/movement_sparsity/importance_threshold": -0.00013685921060898817, "compression/movement_sparsity/linear_layer_sparsity": 0.9129124021073534, "compression/movement_sparsity/model_sparsity": 0.8815510526479235, "compression_loss": 156.012451171875, "distillation_loss": 6.389775276184082, "epoch": 1.69, "learning_rate": 4.1530008453085376e-05, "loss": 161.6535, "step": 2004, "task_loss": 3.454962730407715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4567729073413525, "compression/movement_sparsity/importance_threshold": -0.00013573434603692588, "compression/movement_sparsity/linear_layer_sparsity": 0.9131784899081477, "compression/movement_sparsity/model_sparsity": 0.8818079995141787, "compression_loss": 156.04942321777344, "distillation_loss": 9.083995819091797, "epoch": 1.69, "learning_rate": 4.1525781910397296e-05, "loss": 163.7404, "step": 2005, "task_loss": 4.205441951751709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4571291727885378, "compression/movement_sparsity/importance_threshold": -0.00013461566202384312, "compression/movement_sparsity/linear_layer_sparsity": 0.9133159517126539, "compression/movement_sparsity/model_sparsity": 0.8819407390828182, "compression_loss": 156.086181640625, "distillation_loss": 6.303823471069336, "epoch": 1.7, "learning_rate": 4.1521555367709216e-05, "loss": 162.5442, "step": 2006, "task_loss": 3.544614315032959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.457483475345677, "compression/movement_sparsity/importance_threshold": -0.00013350314154340643, "compression/movement_sparsity/linear_layer_sparsity": 0.9133968810383984, "compression/movement_sparsity/model_sparsity": 0.8820188882372557, "compression_loss": 156.1227569580078, "distillation_loss": 7.097990036010742, "epoch": 1.7, "learning_rate": 4.1517328825021136e-05, "loss": 162.3194, "step": 2007, "task_loss": 2.5408976078033447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4578358204351174, "compression/movement_sparsity/importance_threshold": -0.0001323967675692832, "compression/movement_sparsity/linear_layer_sparsity": 0.9134877074232809, "compression/movement_sparsity/model_sparsity": 0.8821065944564027, "compression_loss": 156.1591339111328, "distillation_loss": 6.6025390625, "epoch": 1.7, "learning_rate": 4.1513102282333055e-05, "loss": 162.0804, "step": 2008, "task_loss": 4.805090427398682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4581862134792076, "compression/movement_sparsity/importance_threshold": -0.00013129652307514086, "compression/movement_sparsity/linear_layer_sparsity": 0.9137236509282917, "compression/movement_sparsity/model_sparsity": 0.8823344325761693, "compression_loss": 156.19512939453125, "distillation_loss": 5.5980224609375, "epoch": 1.7, "learning_rate": 4.150887573964497e-05, "loss": 162.647, "step": 2009, "task_loss": 3.799257278442383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.458534659900296, "compression/movement_sparsity/importance_threshold": -0.00013020239103464417, "compression/movement_sparsity/linear_layer_sparsity": 0.9138732634596188, "compression/movement_sparsity/model_sparsity": 0.8824789054567834, "compression_loss": 156.2310028076172, "distillation_loss": 5.673425674438477, "epoch": 1.7, "learning_rate": 4.150464919695689e-05, "loss": 162.1084, "step": 2010, "task_loss": 3.0742673873901367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4588811651207303, "compression/movement_sparsity/importance_threshold": -0.00012911435442146228, "compression/movement_sparsity/linear_layer_sparsity": 0.9140479644396519, "compression/movement_sparsity/model_sparsity": 0.8826476049207093, "compression_loss": 156.26663208007812, "distillation_loss": 7.06057071685791, "epoch": 1.7, "learning_rate": 4.1500422654268815e-05, "loss": 162.7097, "step": 2011, "task_loss": 2.999030113220215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.459225734562859, "compression/movement_sparsity/importance_threshold": -0.00012803239620926, "compression/movement_sparsity/linear_layer_sparsity": 0.9140976166736876, "compression/movement_sparsity/model_sparsity": 0.8826955514477577, "compression_loss": 156.3020782470703, "distillation_loss": 7.243545055389404, "epoch": 1.7, "learning_rate": 4.149619611158073e-05, "loss": 163.2383, "step": 2012, "task_loss": 2.957284927368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4595683736490304, "compression/movement_sparsity/importance_threshold": -0.00012695649937170383, "compression/movement_sparsity/linear_layer_sparsity": 0.9143062061381417, "compression/movement_sparsity/model_sparsity": 0.8828969752224116, "compression_loss": 156.33729553222656, "distillation_loss": 5.708771705627441, "epoch": 1.7, "learning_rate": 4.149196956889265e-05, "loss": 161.5347, "step": 2013, "task_loss": 2.653768301010132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4599090878015926, "compression/movement_sparsity/importance_threshold": -0.00012588664688246206, "compression/movement_sparsity/linear_layer_sparsity": 0.9144212862799955, "compression/movement_sparsity/model_sparsity": 0.8830081020073647, "compression_loss": 156.3724365234375, "distillation_loss": 6.637938499450684, "epoch": 1.7, "learning_rate": 4.148774302620457e-05, "loss": 162.8613, "step": 2014, "task_loss": 2.731832265853882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4602478824428933, "compression/movement_sparsity/importance_threshold": -0.00012482282171520122, "compression/movement_sparsity/linear_layer_sparsity": 0.9145664510967945, "compression/movement_sparsity/model_sparsity": 0.8831482799661274, "compression_loss": 156.4072723388672, "distillation_loss": 7.97456693649292, "epoch": 1.7, "learning_rate": 4.148351648351649e-05, "loss": 162.577, "step": 2015, "task_loss": 3.6928513050079346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4605847629952815, "compression/movement_sparsity/importance_threshold": -0.00012376500684358697, "compression/movement_sparsity/linear_layer_sparsity": 0.9146595669218629, "compression/movement_sparsity/model_sparsity": 0.883238196976147, "compression_loss": 156.44183349609375, "distillation_loss": 6.399290084838867, "epoch": 1.7, "learning_rate": 4.14792899408284e-05, "loss": 163.7199, "step": 2016, "task_loss": 4.422609806060791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4609197348811052, "compression/movement_sparsity/importance_threshold": -0.0001227131852412867, "compression/movement_sparsity/linear_layer_sparsity": 0.9148900253097613, "compression/movement_sparsity/model_sparsity": 0.8834607384094482, "compression_loss": 156.476318359375, "distillation_loss": 7.382699012756348, "epoch": 1.7, "learning_rate": 4.1475063398140326e-05, "loss": 162.7684, "step": 2017, "task_loss": 4.515773773193359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4612528035227121, "compression/movement_sparsity/importance_threshold": -0.00012166733988196697, "compression/movement_sparsity/linear_layer_sparsity": 0.9151391927166802, "compression/movement_sparsity/model_sparsity": 0.8837013461494109, "compression_loss": 156.5106658935547, "distillation_loss": 7.100282669067383, "epoch": 1.71, "learning_rate": 4.1470836855452246e-05, "loss": 163.0127, "step": 2018, "task_loss": 3.40665602684021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4615839743424508, "compression/movement_sparsity/importance_threshold": -0.00012062745373929429, "compression/movement_sparsity/linear_layer_sparsity": 0.9153970886143087, "compression/movement_sparsity/model_sparsity": 0.8839503825295751, "compression_loss": 156.54478454589844, "distillation_loss": 6.993110656738281, "epoch": 1.71, "learning_rate": 4.146661031276416e-05, "loss": 163.0793, "step": 2019, "task_loss": 3.9153037071228027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4619132527626695, "compression/movement_sparsity/importance_threshold": -0.00011959350978693519, "compression/movement_sparsity/linear_layer_sparsity": 0.9155058847198183, "compression/movement_sparsity/model_sparsity": 0.8840554411541646, "compression_loss": 156.5786590576172, "distillation_loss": 7.774040699005127, "epoch": 1.71, "learning_rate": 4.146238377007608e-05, "loss": 163.3161, "step": 2020, "task_loss": 3.7484071254730225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4622406442057163, "compression/movement_sparsity/importance_threshold": -0.00011856549099855708, "compression/movement_sparsity/linear_layer_sparsity": 0.9155900693433275, "compression/movement_sparsity/model_sparsity": 0.8841367337768741, "compression_loss": 156.61244201660156, "distillation_loss": 7.001641273498535, "epoch": 1.71, "learning_rate": 4.1458157227388e-05, "loss": 162.8766, "step": 2021, "task_loss": 3.392859935760498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4625661540939394, "compression/movement_sparsity/importance_threshold": -0.00011754338034782735, "compression/movement_sparsity/linear_layer_sparsity": 0.9157533708191007, "compression/movement_sparsity/model_sparsity": 0.8842944253445802, "compression_loss": 156.64599609375, "distillation_loss": 6.925978660583496, "epoch": 1.71, "learning_rate": 4.145393068469992e-05, "loss": 162.8029, "step": 2022, "task_loss": 3.8084402084350586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.462889787849687, "compression/movement_sparsity/importance_threshold": -0.00011652716080841081, "compression/movement_sparsity/linear_layer_sparsity": 0.9159962899621784, "compression/movement_sparsity/model_sparsity": 0.8845289994677866, "compression_loss": 156.6792755126953, "distillation_loss": 5.965188026428223, "epoch": 1.71, "learning_rate": 4.144970414201184e-05, "loss": 162.1668, "step": 2023, "task_loss": 3.324993133544922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4632115508953074, "compression/movement_sparsity/importance_threshold": -0.00011551681535397571, "compression/movement_sparsity/linear_layer_sparsity": 0.9161209332864761, "compression/movement_sparsity/model_sparsity": 0.884649360910447, "compression_loss": 156.71253967285156, "distillation_loss": 6.55002498626709, "epoch": 1.71, "learning_rate": 4.144547759932376e-05, "loss": 162.72, "step": 2024, "task_loss": 3.754878282546997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4635314486531485, "compression/movement_sparsity/importance_threshold": -0.00011451232695818773, "compression/movement_sparsity/linear_layer_sparsity": 0.9162370150584113, "compression/movement_sparsity/model_sparsity": 0.8847614549164068, "compression_loss": 156.7454833984375, "distillation_loss": 7.326180458068848, "epoch": 1.71, "learning_rate": 4.144125105663567e-05, "loss": 163.1221, "step": 2025, "task_loss": 2.8257737159729004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4638494865455587, "compression/movement_sparsity/importance_threshold": -0.00011351367859471426, "compression/movement_sparsity/linear_layer_sparsity": 0.916367322362336, "compression/movement_sparsity/model_sparsity": 0.8848872857635696, "compression_loss": 156.77821350097656, "distillation_loss": 5.909095287322998, "epoch": 1.71, "learning_rate": 4.143702451394759e-05, "loss": 162.6844, "step": 2026, "task_loss": 3.5221149921417236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4641656699948862, "compression/movement_sparsity/importance_threshold": -0.00011252085323722183, "compression/movement_sparsity/linear_layer_sparsity": 0.9165036990675873, "compression/movement_sparsity/model_sparsity": 0.8850189775094517, "compression_loss": 156.8107452392578, "distillation_loss": 7.84874153137207, "epoch": 1.71, "learning_rate": 4.143279797125951e-05, "loss": 163.5946, "step": 2027, "task_loss": 4.277594566345215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4644800044234794, "compression/movement_sparsity/importance_threshold": -0.00011153383385937785, "compression/movement_sparsity/linear_layer_sparsity": 0.9166309537845972, "compression/movement_sparsity/model_sparsity": 0.8851418606354511, "compression_loss": 156.8430633544922, "distillation_loss": 8.344863891601562, "epoch": 1.71, "learning_rate": 4.1428571428571437e-05, "loss": 163.4625, "step": 2028, "task_loss": 3.5164167881011963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4647924952536862, "compression/movement_sparsity/importance_threshold": -0.0001105526034348471, "compression/movement_sparsity/linear_layer_sparsity": 0.9167332392945777, "compression/movement_sparsity/model_sparsity": 0.8852406323234967, "compression_loss": 156.87533569335938, "distillation_loss": 7.735446453094482, "epoch": 1.71, "learning_rate": 4.142434488588335e-05, "loss": 162.9165, "step": 2029, "task_loss": 3.5454776287078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4651031479078545, "compression/movement_sparsity/importance_threshold": -0.00010957714493729873, "compression/movement_sparsity/linear_layer_sparsity": 0.9168824583283729, "compression/movement_sparsity/model_sparsity": 0.8853847252244296, "compression_loss": 156.9072265625, "distillation_loss": 7.075920104980469, "epoch": 1.72, "learning_rate": 4.142011834319527e-05, "loss": 163.4122, "step": 2030, "task_loss": 3.234644889831543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4654119678083333, "compression/movement_sparsity/importance_threshold": -0.00010860744134039754, "compression/movement_sparsity/linear_layer_sparsity": 0.9170195504836824, "compression/movement_sparsity/model_sparsity": 0.8855171078424594, "compression_loss": 156.939208984375, "distillation_loss": 5.532312393188477, "epoch": 1.72, "learning_rate": 4.141589180050719e-05, "loss": 163.3148, "step": 2031, "task_loss": 2.670731544494629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4657189603774703, "compression/movement_sparsity/importance_threshold": -0.00010764347561781178, "compression/movement_sparsity/linear_layer_sparsity": 0.917132901621229, "compression/movement_sparsity/model_sparsity": 0.8856265650197223, "compression_loss": 156.97080993652344, "distillation_loss": 6.753641128540039, "epoch": 1.72, "learning_rate": 4.14116652578191e-05, "loss": 162.9724, "step": 2032, "task_loss": 2.9162168502807617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4660241310376136, "compression/movement_sparsity/importance_threshold": -0.00010668523074320625, "compression/movement_sparsity/linear_layer_sparsity": 0.9173215896498988, "compression/movement_sparsity/model_sparsity": 0.8858087710341351, "compression_loss": 157.00216674804688, "distillation_loss": 8.310647964477539, "epoch": 1.72, "learning_rate": 4.140743871513102e-05, "loss": 163.1907, "step": 2033, "task_loss": 4.008687973022461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4663274852111117, "compression/movement_sparsity/importance_threshold": -0.0001057326896902501, "compression/movement_sparsity/linear_layer_sparsity": 0.9174730981238801, "compression/movement_sparsity/model_sparsity": 0.8859550747259405, "compression_loss": 157.03335571289062, "distillation_loss": 6.653524398803711, "epoch": 1.72, "learning_rate": 4.140321217244295e-05, "loss": 163.1501, "step": 2034, "task_loss": 2.6538071632385254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4666290283203125, "compression/movement_sparsity/importance_threshold": -0.00010478583543260811, "compression/movement_sparsity/linear_layer_sparsity": 0.9175802963909265, "compression/movement_sparsity/model_sparsity": 0.8860585904027335, "compression_loss": 157.06422424316406, "distillation_loss": 4.820926666259766, "epoch": 1.72, "learning_rate": 4.139898562975486e-05, "loss": 162.9441, "step": 2035, "task_loss": 2.164738893508911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4669287657875645, "compression/movement_sparsity/importance_threshold": -0.00010384465094394769, "compression/movement_sparsity/linear_layer_sparsity": 0.9177019944458181, "compression/movement_sparsity/model_sparsity": 0.8861761077550525, "compression_loss": 157.09498596191406, "distillation_loss": 6.006706714630127, "epoch": 1.72, "learning_rate": 4.139475908706678e-05, "loss": 163.1836, "step": 2036, "task_loss": 3.0218958854675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4672267030352155, "compression/movement_sparsity/importance_threshold": -0.00010290911919793623, "compression/movement_sparsity/linear_layer_sparsity": 0.9178168361043191, "compression/movement_sparsity/model_sparsity": 0.8862870042492897, "compression_loss": 157.12550354003906, "distillation_loss": 7.793644905090332, "epoch": 1.72, "learning_rate": 4.13905325443787e-05, "loss": 164.2727, "step": 2037, "task_loss": 4.520610332489014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4675228454856142, "compression/movement_sparsity/importance_threshold": -0.00010197922316823854, "compression/movement_sparsity/linear_layer_sparsity": 0.917902535097118, "compression/movement_sparsity/model_sparsity": 0.8863697592180451, "compression_loss": 157.15582275390625, "distillation_loss": 7.076512336730957, "epoch": 1.72, "learning_rate": 4.1386306001690614e-05, "loss": 163.6283, "step": 2038, "task_loss": 3.4475290775299072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4678171985611084, "compression/movement_sparsity/importance_threshold": -0.00010105494582852374, "compression/movement_sparsity/linear_layer_sparsity": 0.9180419643892842, "compression/movement_sparsity/model_sparsity": 0.8865043986850907, "compression_loss": 157.18597412109375, "distillation_loss": 6.465146541595459, "epoch": 1.72, "learning_rate": 4.138207945900254e-05, "loss": 163.2867, "step": 2039, "task_loss": 2.3278679847717285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4681097676840464, "compression/movement_sparsity/importance_threshold": -0.00010013627015245576, "compression/movement_sparsity/linear_layer_sparsity": 0.918228494143612, "compression/movement_sparsity/model_sparsity": 0.8866845205685246, "compression_loss": 157.2158660888672, "distillation_loss": 6.347691535949707, "epoch": 1.72, "learning_rate": 4.137785291631446e-05, "loss": 163.8396, "step": 2040, "task_loss": 3.275932788848877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4684005582767765, "compression/movement_sparsity/importance_threshold": -9.922317911370462e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9183386973009027, "compression/movement_sparsity/model_sparsity": 0.8867909379083379, "compression_loss": 157.2456817626953, "distillation_loss": 5.799373626708984, "epoch": 1.72, "learning_rate": 4.137362637362637e-05, "loss": 163.0939, "step": 2041, "task_loss": 2.822983980178833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4686895757616467, "compression/movement_sparsity/importance_threshold": -9.83156556859351e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9185223891033332, "compression/movement_sparsity/model_sparsity": 0.8869683193322527, "compression_loss": 157.27516174316406, "distillation_loss": 8.249189376831055, "epoch": 1.73, "learning_rate": 4.136939983093829e-05, "loss": 164.4134, "step": 2042, "task_loss": 4.526817798614502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4689768255610054, "compression/movement_sparsity/importance_threshold": -9.741368284281373e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9187148332448083, "compression/movement_sparsity/model_sparsity": 0.8871541524254408, "compression_loss": 157.30471801757812, "distillation_loss": 7.420807838439941, "epoch": 1.73, "learning_rate": 4.136517328825021e-05, "loss": 163.9933, "step": 2043, "task_loss": 3.103855609893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4692623130972007, "compression/movement_sparsity/importance_threshold": -9.651724355800792e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9188981315497068, "compression/movement_sparsity/model_sparsity": 0.8873311538696745, "compression_loss": 157.33387756347656, "distillation_loss": 5.574035167694092, "epoch": 1.73, "learning_rate": 4.136094674556213e-05, "loss": 164.164, "step": 2044, "task_loss": 1.412148356437683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4695460437925807, "compression/movement_sparsity/importance_threshold": -9.562632080518333e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91904191316306, "compression/movement_sparsity/model_sparsity": 0.8874699961422851, "compression_loss": 157.36293029785156, "distillation_loss": 6.073769569396973, "epoch": 1.73, "learning_rate": 4.135672020287405e-05, "loss": 163.5702, "step": 2045, "task_loss": 3.0423617362976074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4698280230694938, "compression/movement_sparsity/importance_threshold": -9.474089755800823e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9191824752511516, "compression/movement_sparsity/model_sparsity": 0.8876057294902311, "compression_loss": 157.3916015625, "distillation_loss": 6.953325271606445, "epoch": 1.73, "learning_rate": 4.135249366018597e-05, "loss": 163.5973, "step": 2046, "task_loss": 2.9939026832580566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4701082563502879, "compression/movement_sparsity/importance_threshold": -9.386095679014914e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9192988789756129, "compression/movement_sparsity/model_sparsity": 0.8877181343886574, "compression_loss": 157.42037963867188, "distillation_loss": 5.951033592224121, "epoch": 1.73, "learning_rate": 4.134826711749789e-05, "loss": 164.2216, "step": 2047, "task_loss": 2.8542072772979736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4703867490573115, "compression/movement_sparsity/importance_threshold": -9.298648147527174e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9194350410458468, "compression/movement_sparsity/model_sparsity": 0.8878496188728953, "compression_loss": 157.4488067626953, "distillation_loss": 7.951514720916748, "epoch": 1.73, "learning_rate": 4.1344040574809804e-05, "loss": 163.5652, "step": 2048, "task_loss": 3.86883807182312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4706635066129126, "compression/movement_sparsity/importance_threshold": -9.211745458704342e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9196636631119292, "compression/movement_sparsity/model_sparsity": 0.888070387067684, "compression_loss": 157.4771270751953, "distillation_loss": 6.9326043128967285, "epoch": 1.73, "learning_rate": 4.1339814032121724e-05, "loss": 163.7969, "step": 2049, "task_loss": 3.4746665954589844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4709385344394397, "compression/movement_sparsity/importance_threshold": -9.125385909913072e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9198505625154537, "compression/movement_sparsity/model_sparsity": 0.8882508659017276, "compression_loss": 157.5052490234375, "distillation_loss": 8.950945854187012, "epoch": 1.73, "learning_rate": 4.1335587489433644e-05, "loss": 164.6084, "step": 2050, "task_loss": 3.856461524963379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4712118379592405, "compression/movement_sparsity/importance_threshold": -9.039567798520016e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.91997596898648, "compression/movement_sparsity/model_sparsity": 0.8883719642746788, "compression_loss": 157.53318786621094, "distillation_loss": 6.731488227844238, "epoch": 1.73, "learning_rate": 4.1331360946745563e-05, "loss": 164.1801, "step": 2051, "task_loss": 3.1885368824005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4714834225946638, "compression/movement_sparsity/importance_threshold": -8.954289421891914e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9200560755446578, "compression/movement_sparsity/model_sparsity": 0.8884493189261465, "compression_loss": 157.56094360351562, "distillation_loss": 6.878177165985107, "epoch": 1.73, "learning_rate": 4.132713440405748e-05, "loss": 164.0798, "step": 2052, "task_loss": 2.3549516201019287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.471753293768057, "compression/movement_sparsity/importance_threshold": -8.869549077395421e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9202378714044341, "compression/movement_sparsity/model_sparsity": 0.8886248695388701, "compression_loss": 157.5885009765625, "distillation_loss": 6.572344779968262, "epoch": 1.73, "learning_rate": 4.13229078613694e-05, "loss": 163.6014, "step": 2053, "task_loss": 2.567559242248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.472021456901769, "compression/movement_sparsity/importance_threshold": -8.785345062397101e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203249178281759, "compression/movement_sparsity/model_sparsity": 0.8887089256501702, "compression_loss": 157.61595153808594, "distillation_loss": 5.47068452835083, "epoch": 1.74, "learning_rate": 4.1318681318681316e-05, "loss": 163.2955, "step": 2054, "task_loss": 2.1045193672180176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4722879174181478, "compression/movement_sparsity/importance_threshold": -8.701675674263696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9203500420493848, "compression/movement_sparsity/model_sparsity": 0.8887331867770893, "compression_loss": 157.64317321777344, "distillation_loss": 5.3615288734436035, "epoch": 1.74, "learning_rate": 4.1314454775993236e-05, "loss": 163.4814, "step": 2055, "task_loss": 2.523104667663574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4725526807395415, "compression/movement_sparsity/importance_threshold": -8.618539210361945e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9204586831407151, "compression/movement_sparsity/model_sparsity": 0.8888380957127134, "compression_loss": 157.670166015625, "distillation_loss": 7.267078876495361, "epoch": 1.74, "learning_rate": 4.131022823330516e-05, "loss": 163.8252, "step": 2056, "task_loss": 3.9273033142089844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4728157522882985, "compression/movement_sparsity/importance_threshold": -8.5359339680585e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9206906320495679, "compression/movement_sparsity/model_sparsity": 0.8890620764629888, "compression_loss": 157.6969757080078, "distillation_loss": 5.640903472900391, "epoch": 1.74, "learning_rate": 4.130600169061708e-05, "loss": 163.6917, "step": 2057, "task_loss": 3.4636130332946777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4730771374867666, "compression/movement_sparsity/importance_threshold": -8.45385824471993e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.920845586607996, "compression/movement_sparsity/model_sparsity": 0.8892117078556389, "compression_loss": 157.72372436523438, "distillation_loss": 6.200067043304443, "epoch": 1.74, "learning_rate": 4.1301775147928995e-05, "loss": 164.0651, "step": 2058, "task_loss": 3.0539345741271973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4733368417572943, "compression/movement_sparsity/importance_threshold": -8.372310337712973e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9209980490153881, "compression/movement_sparsity/model_sparsity": 0.8893589327103079, "compression_loss": 157.75021362304688, "distillation_loss": 6.288613319396973, "epoch": 1.74, "learning_rate": 4.1297548605240915e-05, "loss": 163.8086, "step": 2059, "task_loss": 3.4023566246032715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4735948705222297, "compression/movement_sparsity/importance_threshold": -8.29128854440437e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9211175053267642, "compression/movement_sparsity/model_sparsity": 0.8894742853298976, "compression_loss": 157.77650451660156, "distillation_loss": 6.743499755859375, "epoch": 1.74, "learning_rate": 4.1293322062552834e-05, "loss": 164.5779, "step": 2060, "task_loss": 3.0008227825164795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.473851229203921, "compression/movement_sparsity/importance_threshold": -8.210791162160686e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9212479557207005, "compression/movement_sparsity/model_sparsity": 0.8896002543514899, "compression_loss": 157.802734375, "distillation_loss": 6.0286865234375, "epoch": 1.74, "learning_rate": 4.1289095519864754e-05, "loss": 163.6875, "step": 2061, "task_loss": 2.7711236476898193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4741059232247165, "compression/movement_sparsity/importance_threshold": -8.130816488348663e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213412861807864, "compression/movement_sparsity/model_sparsity": 0.8896903786231538, "compression_loss": 157.82858276367188, "distillation_loss": 6.389291286468506, "epoch": 1.74, "learning_rate": 4.1284868977176674e-05, "loss": 163.635, "step": 2062, "task_loss": 3.5396840572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4743589580069643, "compression/movement_sparsity/importance_threshold": -8.051362820334865e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9213777502854169, "compression/movement_sparsity/model_sparsity": 0.8897255900736135, "compression_loss": 157.85430908203125, "distillation_loss": 6.952322959899902, "epoch": 1.74, "learning_rate": 4.1280642434488594e-05, "loss": 164.5493, "step": 2063, "task_loss": 3.490713596343994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4746103389730125, "compression/movement_sparsity/importance_threshold": -7.972428455486121e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.921456533260987, "compression/movement_sparsity/model_sparsity": 0.889801666611608, "compression_loss": 157.87986755371094, "distillation_loss": 9.75994873046875, "epoch": 1.74, "learning_rate": 4.1276415891800506e-05, "loss": 165.1418, "step": 2064, "task_loss": 4.526374816894531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4748600715452094, "compression/movement_sparsity/importance_threshold": -7.894011691168996e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9215729489096159, "compression/movement_sparsity/model_sparsity": 0.8899140830245701, "compression_loss": 157.90533447265625, "distillation_loss": 6.862576484680176, "epoch": 1.75, "learning_rate": 4.1272189349112426e-05, "loss": 164.3189, "step": 2065, "task_loss": 3.1907973289489746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4751081611459034, "compression/movement_sparsity/importance_threshold": -7.816110824750231e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.921751644485807, "compression/movement_sparsity/model_sparsity": 0.8900866398579871, "compression_loss": 157.9305419921875, "distillation_loss": 8.265615463256836, "epoch": 1.75, "learning_rate": 4.1267962806424346e-05, "loss": 164.766, "step": 2066, "task_loss": 3.42014741897583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.475354613197442, "compression/movement_sparsity/importance_threshold": -7.738724153596478e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9218749284549942, "compression/movement_sparsity/model_sparsity": 0.8902056886435669, "compression_loss": 157.95558166503906, "distillation_loss": 6.264739036560059, "epoch": 1.75, "learning_rate": 4.1263736263736266e-05, "loss": 164.4062, "step": 2067, "task_loss": 3.4520792961120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4755994331221745, "compression/movement_sparsity/importance_threshold": -7.661849975074304e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9220492597858305, "compression/movement_sparsity/model_sparsity": 0.8903740311568831, "compression_loss": 157.98036193847656, "distillation_loss": 8.067062377929688, "epoch": 1.75, "learning_rate": 4.1259509721048185e-05, "loss": 164.8944, "step": 2068, "task_loss": 3.4594380855560303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4758426263424482, "compression/movement_sparsity/importance_threshold": -7.58548658655045e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.922238126677015, "compression/movement_sparsity/model_sparsity": 0.8905564098893327, "compression_loss": 158.00497436523438, "distillation_loss": 8.489025115966797, "epoch": 1.75, "learning_rate": 4.1255283178360105e-05, "loss": 165.0319, "step": 2069, "task_loss": 3.4526848793029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4760841982806114, "compression/movement_sparsity/importance_threshold": -7.509632285391653e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9223003231354036, "compression/movement_sparsity/model_sparsity": 0.8906164697080371, "compression_loss": 158.0295867919922, "distillation_loss": 7.891172409057617, "epoch": 1.75, "learning_rate": 4.125105663567202e-05, "loss": 164.8274, "step": 2070, "task_loss": 3.8329386711120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4763241543590127, "compression/movement_sparsity/importance_threshold": -7.43428536896457e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9224427096211434, "compression/movement_sparsity/model_sparsity": 0.8907539647799597, "compression_loss": 158.053955078125, "distillation_loss": 6.789915561676025, "epoch": 1.75, "learning_rate": 4.124683009298394e-05, "loss": 164.2328, "step": 2071, "task_loss": 3.3769826889038086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4765625, "compression/movement_sparsity/importance_threshold": -7.359444134635851e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9225536163043246, "compression/movement_sparsity/model_sparsity": 0.8908610614773848, "compression_loss": 158.0781707763672, "distillation_loss": 7.640774726867676, "epoch": 1.75, "learning_rate": 4.124260355029586e-05, "loss": 164.7641, "step": 2072, "task_loss": 2.1207337379455566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4767992406259216, "compression/movement_sparsity/importance_threshold": -7.28510687977215e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9226302648538879, "compression/movement_sparsity/model_sparsity": 0.8909350769134721, "compression_loss": 158.10218811035156, "distillation_loss": 7.081815719604492, "epoch": 1.75, "learning_rate": 4.1238377007607784e-05, "loss": 164.846, "step": 2073, "task_loss": 3.063659906387329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4770343816591256, "compression/movement_sparsity/importance_threshold": -7.211271901740122e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9227174185951384, "compression/movement_sparsity/model_sparsity": 0.8910192366555943, "compression_loss": 158.12603759765625, "distillation_loss": 7.716804504394531, "epoch": 1.75, "learning_rate": 4.12341504649197e-05, "loss": 164.3071, "step": 2074, "task_loss": 3.098231792449951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47726792852196, "compression/movement_sparsity/importance_threshold": -7.137937497906505e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9227878904258664, "compression/movement_sparsity/model_sparsity": 0.8910872875621401, "compression_loss": 158.1495819091797, "distillation_loss": 5.241825103759766, "epoch": 1.75, "learning_rate": 4.122992392223162e-05, "loss": 164.0335, "step": 2075, "task_loss": 2.4045944213867188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4774998866367737, "compression/movement_sparsity/importance_threshold": -7.065101965637866e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9229345219152846, "compression/movement_sparsity/model_sparsity": 0.8912288818088056, "compression_loss": 158.1731414794922, "distillation_loss": 5.257561683654785, "epoch": 1.75, "learning_rate": 4.1225697379543537e-05, "loss": 163.999, "step": 2076, "task_loss": 2.453101396560669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4777302614259142, "compression/movement_sparsity/importance_threshold": -6.992763602300944e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9230235715991892, "compression/movement_sparsity/model_sparsity": 0.8913148723621191, "compression_loss": 158.19631958007812, "distillation_loss": 9.051553726196289, "epoch": 1.76, "learning_rate": 4.122147083685545e-05, "loss": 164.839, "step": 2077, "task_loss": 3.787735939025879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47795905831173, "compression/movement_sparsity/importance_threshold": -6.920920705262481e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9230572692969282, "compression/movement_sparsity/model_sparsity": 0.8913474124402745, "compression_loss": 158.21945190429688, "distillation_loss": 6.767080307006836, "epoch": 1.76, "learning_rate": 4.1217244294167376e-05, "loss": 164.7223, "step": 2078, "task_loss": 2.7486073970794678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4781862827165693, "compression/movement_sparsity/importance_threshold": -6.849571571889042e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.923190748429444, "compression/movement_sparsity/model_sparsity": 0.8914763061539587, "compression_loss": 158.242431640625, "distillation_loss": 5.395778656005859, "epoch": 1.76, "learning_rate": 4.1213017751479296e-05, "loss": 164.27, "step": 2079, "task_loss": 1.5669970512390137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.47841194006278, "compression/movement_sparsity/importance_threshold": -6.778714499547367e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9234242713284248, "compression/movement_sparsity/model_sparsity": 0.891701806822959, "compression_loss": 158.26522827148438, "distillation_loss": 5.859036445617676, "epoch": 1.76, "learning_rate": 4.120879120879121e-05, "loss": 164.1275, "step": 2080, "task_loss": 2.150235652923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4786360357727106, "compression/movement_sparsity/importance_threshold": -6.708347785604023e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9235100418662295, "compression/movement_sparsity/model_sparsity": 0.891784630878929, "compression_loss": 158.28778076171875, "distillation_loss": 7.4065937995910645, "epoch": 1.76, "learning_rate": 4.120456466610313e-05, "loss": 164.3823, "step": 2081, "task_loss": 3.0152347087860107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4788585752687093, "compression/movement_sparsity/importance_threshold": -6.638469727425749e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9236629693161594, "compression/movement_sparsity/model_sparsity": 0.891932304800494, "compression_loss": 158.31011962890625, "distillation_loss": 7.203195571899414, "epoch": 1.76, "learning_rate": 4.120033812341505e-05, "loss": 164.8392, "step": 2082, "task_loss": 3.356092691421509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4790795639731242, "compression/movement_sparsity/importance_threshold": -6.569078622379286e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9237899378531461, "compression/movement_sparsity/model_sparsity": 0.8920549115776343, "compression_loss": 158.33250427246094, "distillation_loss": 6.737878322601318, "epoch": 1.76, "learning_rate": 4.119611158072697e-05, "loss": 165.0983, "step": 2083, "task_loss": 3.8402137756347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4792990073083037, "compression/movement_sparsity/importance_threshold": -6.5001727678312e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9238590026320931, "compression/movement_sparsity/model_sparsity": 0.8921216037689563, "compression_loss": 158.35467529296875, "distillation_loss": 7.76764440536499, "epoch": 1.76, "learning_rate": 4.119188503803889e-05, "loss": 166.1562, "step": 2084, "task_loss": 3.0231523513793945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4795169106965955, "compression/movement_sparsity/importance_threshold": -6.431750461148317e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9239815949995573, "compression/movement_sparsity/model_sparsity": 0.8922399847114599, "compression_loss": 158.37669372558594, "distillation_loss": 5.456912994384766, "epoch": 1.76, "learning_rate": 4.118765849535081e-05, "loss": 164.4382, "step": 2085, "task_loss": 3.8630337715148926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4797332795603482, "compression/movement_sparsity/importance_threshold": -6.363809999697118e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9241175305106062, "compression/movement_sparsity/model_sparsity": 0.8923712504195177, "compression_loss": 158.39859008789062, "distillation_loss": 6.736979961395264, "epoch": 1.76, "learning_rate": 4.118343195266273e-05, "loss": 165.563, "step": 2086, "task_loss": 3.4097113609313965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4799481193219097, "compression/movement_sparsity/importance_threshold": -6.296349680844342e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9241838288826616, "compression/movement_sparsity/model_sparsity": 0.8924352712385354, "compression_loss": 158.42047119140625, "distillation_loss": 7.493833065032959, "epoch": 1.76, "learning_rate": 4.117920540997464e-05, "loss": 164.8506, "step": 2087, "task_loss": 2.2955262660980225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4801614354036285, "compression/movement_sparsity/importance_threshold": -6.22936780195673e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9243525439305414, "compression/movement_sparsity/model_sparsity": 0.8925981904054924, "compression_loss": 158.44212341308594, "distillation_loss": 5.537937164306641, "epoch": 1.76, "learning_rate": 4.117497886728656e-05, "loss": 164.6803, "step": 2088, "task_loss": 2.4924726486206055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.480373233227853, "compression/movement_sparsity/importance_threshold": -6.16286266040076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.924466777456493, "compression/movement_sparsity/model_sparsity": 0.892708499658404, "compression_loss": 158.46356201171875, "distillation_loss": 7.666211128234863, "epoch": 1.77, "learning_rate": 4.117075232459848e-05, "loss": 165.5147, "step": 2089, "task_loss": 3.7078964710235596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4805835182169307, "compression/movement_sparsity/importance_threshold": -6.096832553543433e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9245956300119661, "compression/movement_sparsity/model_sparsity": 0.8928329257321999, "compression_loss": 158.48486328125, "distillation_loss": 7.490076065063477, "epoch": 1.77, "learning_rate": 4.11665257819104e-05, "loss": 165.6964, "step": 2090, "task_loss": 3.988311767578125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4807922957932103, "compression/movement_sparsity/importance_threshold": -6.0312757787511426e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9247157183042269, "compression/movement_sparsity/model_sparsity": 0.8929488886221867, "compression_loss": 158.50584411621094, "distillation_loss": 9.895221710205078, "epoch": 1.77, "learning_rate": 4.116229923922232e-05, "loss": 165.6142, "step": 2091, "task_loss": 3.813565731048584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4809995713790398, "compression/movement_sparsity/importance_threshold": -5.966190633390628e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9248041002347439, "compression/movement_sparsity/model_sparsity": 0.8930342343614958, "compression_loss": 158.52684020996094, "distillation_loss": 6.144832611083984, "epoch": 1.77, "learning_rate": 4.115807269653424e-05, "loss": 165.2048, "step": 2092, "task_loss": 3.0759212970733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4812053503967673, "compression/movement_sparsity/importance_threshold": -5.901575414828629e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9248985634907553, "compression/movement_sparsity/model_sparsity": 0.8931254525140602, "compression_loss": 158.54759216308594, "distillation_loss": 6.5753021240234375, "epoch": 1.77, "learning_rate": 4.115384615384615e-05, "loss": 163.7794, "step": 2093, "task_loss": 2.998349905014038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4814096382687416, "compression/movement_sparsity/importance_threshold": -5.8374284204317124e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9250577392045264, "compression/movement_sparsity/model_sparsity": 0.8932791600523815, "compression_loss": 158.5681610107422, "distillation_loss": 5.708560943603516, "epoch": 1.77, "learning_rate": 4.114961961115807e-05, "loss": 164.6118, "step": 2094, "task_loss": 2.953666925430298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.48161244041731, "compression/movement_sparsity/importance_threshold": -5.773747947566705e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9250696991446652, "compression/movement_sparsity/model_sparsity": 0.8932907091317834, "compression_loss": 158.58865356445312, "distillation_loss": 6.482016563415527, "epoch": 1.77, "learning_rate": 4.114539306847e-05, "loss": 164.4768, "step": 2095, "task_loss": 3.642782211303711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4818137622648213, "compression/movement_sparsity/importance_threshold": -5.710532293600173e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9252270385366204, "compression/movement_sparsity/model_sparsity": 0.8934426434315924, "compression_loss": 158.6089630126953, "distillation_loss": 6.840619087219238, "epoch": 1.77, "learning_rate": 4.114116652578191e-05, "loss": 164.3295, "step": 2096, "task_loss": 2.9389097690582275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4820136092336238, "compression/movement_sparsity/importance_threshold": -5.647779755898769e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9252998594283726, "compression/movement_sparsity/model_sparsity": 0.8935129627016896, "compression_loss": 158.62913513183594, "distillation_loss": 6.588991641998291, "epoch": 1.77, "learning_rate": 4.113693998309383e-05, "loss": 164.4038, "step": 2097, "task_loss": 3.1686906814575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4822119867460652, "compression/movement_sparsity/importance_threshold": -5.5854886318291476e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9254340778592819, "compression/movement_sparsity/model_sparsity": 0.893642570316593, "compression_loss": 158.64907836914062, "distillation_loss": 7.505667209625244, "epoch": 1.77, "learning_rate": 4.113271344040575e-05, "loss": 165.3356, "step": 2098, "task_loss": 2.816650390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4824089002244938, "compression/movement_sparsity/importance_threshold": -5.5236572187581344e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9254680140403735, "compression/movement_sparsity/model_sparsity": 0.8936753406854643, "compression_loss": 158.6689453125, "distillation_loss": 5.966259956359863, "epoch": 1.77, "learning_rate": 4.1128486897717663e-05, "loss": 164.5278, "step": 2099, "task_loss": 3.4562697410583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4826043550912582, "compression/movement_sparsity/importance_threshold": -5.4622838140522094e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9255901890619705, "compression/movement_sparsity/model_sparsity": 0.8937933186192151, "compression_loss": 158.68865966796875, "distillation_loss": 5.01706600189209, "epoch": 1.77, "learning_rate": 4.112426035502959e-05, "loss": 164.7906, "step": 2100, "task_loss": 1.6183031797409058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4827983567687062, "compression/movement_sparsity/importance_threshold": -5.4013667150781125e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9255869456883736, "compression/movement_sparsity/model_sparsity": 0.8937901866654789, "compression_loss": 158.70819091796875, "distillation_loss": 7.5991363525390625, "epoch": 1.78, "learning_rate": 4.112003381234151e-05, "loss": 164.6592, "step": 2101, "task_loss": 4.006626605987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.482990910679186, "compression/movement_sparsity/importance_threshold": -5.3409042192026704e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9256177696617123, "compression/movement_sparsity/model_sparsity": 0.8938199517405079, "compression_loss": 158.72752380371094, "distillation_loss": 6.564167022705078, "epoch": 1.78, "learning_rate": 4.111580726965343e-05, "loss": 164.2414, "step": 2102, "task_loss": 3.2785227298736572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.483182022245046, "compression/movement_sparsity/importance_threshold": -5.280894623792276e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9256572148082518, "compression/movement_sparsity/model_sparsity": 0.8938580418249162, "compression_loss": 158.74671936035156, "distillation_loss": 6.228639602661133, "epoch": 1.78, "learning_rate": 4.111158072696534e-05, "loss": 164.8619, "step": 2103, "task_loss": 3.1832244396209717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4833716968886344, "compression/movement_sparsity/importance_threshold": -5.221336226213843e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9256880984024286, "compression/movement_sparsity/model_sparsity": 0.893887864472624, "compression_loss": 158.76576232910156, "distillation_loss": 7.8237504959106445, "epoch": 1.78, "learning_rate": 4.110735418427726e-05, "loss": 165.655, "step": 2104, "task_loss": 3.245042324066162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4835599400322992, "compression/movement_sparsity/importance_threshold": -5.162227323833937e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.925858792862136, "compression/movement_sparsity/model_sparsity": 0.8940526950525229, "compression_loss": 158.78469848632812, "distillation_loss": 8.02081298828125, "epoch": 1.78, "learning_rate": 4.110312764158918e-05, "loss": 166.3767, "step": 2105, "task_loss": 4.341396808624268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4837467570983887, "compression/movement_sparsity/importance_threshold": -5.1035662140192124e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.925900253193006, "compression/movement_sparsity/model_sparsity": 0.8940927310934805, "compression_loss": 158.80349731445312, "distillation_loss": 4.749598503112793, "epoch": 1.78, "learning_rate": 4.10989010989011e-05, "loss": 164.0564, "step": 2106, "task_loss": 3.175304889678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4839321535092513, "compression/movement_sparsity/importance_threshold": -5.0453511941363216e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9260122688237772, "compression/movement_sparsity/model_sparsity": 0.8942008986427344, "compression_loss": 158.82200622558594, "distillation_loss": 5.590914249420166, "epoch": 1.78, "learning_rate": 4.109467455621302e-05, "loss": 164.4343, "step": 2107, "task_loss": 3.014348030090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4841161346872347, "compression/movement_sparsity/importance_threshold": -4.9875805615520914e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9261388796357348, "compression/movement_sparsity/model_sparsity": 0.8943231599838009, "compression_loss": 158.84027099609375, "distillation_loss": 7.44611120223999, "epoch": 1.78, "learning_rate": 4.109044801352494e-05, "loss": 165.0043, "step": 2108, "task_loss": 4.226390361785889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4842987060546875, "compression/movement_sparsity/importance_threshold": -4.9302526136330016e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9263273530293873, "compression/movement_sparsity/model_sparsity": 0.8945051587365694, "compression_loss": 158.85853576660156, "distillation_loss": 6.763064384460449, "epoch": 1.78, "learning_rate": 4.1086221470836854e-05, "loss": 165.3203, "step": 2109, "task_loss": 4.198666095733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4844798730339577, "compression/movement_sparsity/importance_threshold": -4.873365647745879e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9264239507114054, "compression/movement_sparsity/model_sparsity": 0.894598437991041, "compression_loss": 158.87661743164062, "distillation_loss": 6.2183685302734375, "epoch": 1.78, "learning_rate": 4.1081994928148774e-05, "loss": 165.4624, "step": 2110, "task_loss": 2.8930840492248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4846596410473936, "compression/movement_sparsity/importance_threshold": -4.8169179612572895e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9265281440882076, "compression/movement_sparsity/model_sparsity": 0.8946990520048137, "compression_loss": 158.89450073242188, "distillation_loss": 5.854990482330322, "epoch": 1.78, "learning_rate": 4.1077768385460694e-05, "loss": 164.8618, "step": 2111, "task_loss": 2.8054862022399902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4848380155173435, "compression/movement_sparsity/importance_threshold": -4.760907851533887e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9266214626241258, "compression/movement_sparsity/model_sparsity": 0.8947891647619417, "compression_loss": 158.91212463378906, "distillation_loss": 6.993115425109863, "epoch": 1.78, "learning_rate": 4.107354184277261e-05, "loss": 165.0282, "step": 2112, "task_loss": 3.1597177982330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4850150018661554, "compression/movement_sparsity/importance_threshold": -4.7053336159424974e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9267490392936619, "compression/movement_sparsity/model_sparsity": 0.8949123587804075, "compression_loss": 158.92962646484375, "distillation_loss": 7.471574306488037, "epoch": 1.79, "learning_rate": 4.106931530008453e-05, "loss": 165.3545, "step": 2113, "task_loss": 2.915780782699585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4851906055161774, "compression/movement_sparsity/importance_threshold": -4.650193551849688e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9268468770891142, "compression/movement_sparsity/model_sparsity": 0.8950068355466018, "compression_loss": 158.94700622558594, "distillation_loss": 7.544171333312988, "epoch": 1.79, "learning_rate": 4.106508875739645e-05, "loss": 165.4697, "step": 2114, "task_loss": 3.156970500946045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.485364831889758, "compression/movement_sparsity/importance_threshold": -4.595485956622111e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9269344839487349, "compression/movement_sparsity/model_sparsity": 0.8950914328410843, "compression_loss": 158.9642791748047, "distillation_loss": 7.283099174499512, "epoch": 1.79, "learning_rate": 4.106086221470837e-05, "loss": 165.5411, "step": 2115, "task_loss": 3.2034311294555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4855376864092453, "compression/movement_sparsity/importance_threshold": -4.541209127626421e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.926940636819235, "compression/movement_sparsity/model_sparsity": 0.8950973743415542, "compression_loss": 158.9813995361328, "distillation_loss": 8.349763870239258, "epoch": 1.79, "learning_rate": 4.1056635672020285e-05, "loss": 165.8371, "step": 2116, "task_loss": 3.8338661193847656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4857091744969875, "compression/movement_sparsity/importance_threshold": -4.4873613622293564e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9269877611297319, "compression/movement_sparsity/model_sparsity": 0.8951428797870142, "compression_loss": 158.99832153320312, "distillation_loss": 9.298206329345703, "epoch": 1.79, "learning_rate": 4.105240912933221e-05, "loss": 166.3419, "step": 2117, "task_loss": 3.6676175594329834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4858793015753324, "compression/movement_sparsity/importance_threshold": -4.433940957797658e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9271289194262052, "compression/movement_sparsity/model_sparsity": 0.8952791888617501, "compression_loss": 159.0152130126953, "distillation_loss": 7.369715690612793, "epoch": 1.79, "learning_rate": 4.104818258664413e-05, "loss": 165.7438, "step": 2118, "task_loss": 3.514754056930542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4860480730666286, "compression/movement_sparsity/importance_threshold": -4.380946211697805e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9271045225792223, "compression/movement_sparsity/model_sparsity": 0.8952556301215144, "compression_loss": 159.03184509277344, "distillation_loss": 6.515574932098389, "epoch": 1.79, "learning_rate": 4.1043956043956045e-05, "loss": 165.6759, "step": 2119, "task_loss": 3.5238170623779297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4862154943932244, "compression/movement_sparsity/importance_threshold": -4.328375421296625e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9272547551672665, "compression/movement_sparsity/model_sparsity": 0.8954007017579898, "compression_loss": 159.0485076904297, "distillation_loss": 5.713984489440918, "epoch": 1.79, "learning_rate": 4.1039729501267964e-05, "loss": 165.5071, "step": 2120, "task_loss": 2.8248162269592285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4863815709774677, "compression/movement_sparsity/importance_threshold": -4.2762268839606835e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9274026744667894, "compression/movement_sparsity/model_sparsity": 0.895543539574521, "compression_loss": 159.0647735595703, "distillation_loss": 6.610176086425781, "epoch": 1.79, "learning_rate": 4.1035502958579884e-05, "loss": 166.9932, "step": 2121, "task_loss": 3.3706016540527344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.486546308241707, "compression/movement_sparsity/importance_threshold": -4.224498897056721e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9275124125815423, "compression/movement_sparsity/model_sparsity": 0.8956495078474382, "compression_loss": 159.08111572265625, "distillation_loss": 5.752680778503418, "epoch": 1.79, "learning_rate": 4.1031276415891804e-05, "loss": 165.367, "step": 2122, "task_loss": 3.135019302368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4867097116082901, "compression/movement_sparsity/importance_threshold": -4.1731897579514766e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9276375924933835, "compression/movement_sparsity/model_sparsity": 0.8957703874442093, "compression_loss": 159.09716796875, "distillation_loss": 7.08063268661499, "epoch": 1.79, "learning_rate": 4.1027049873203724e-05, "loss": 165.3837, "step": 2123, "task_loss": 3.801912307739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4868717864995658, "compression/movement_sparsity/importance_threshold": -4.122297764011431e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9278611348640531, "compression/movement_sparsity/model_sparsity": 0.8959862504467496, "compression_loss": 159.1132354736328, "distillation_loss": 5.206726551055908, "epoch": 1.79, "learning_rate": 4.102282333051564e-05, "loss": 165.7805, "step": 2124, "task_loss": 3.422966480255127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4870325383378815, "compression/movement_sparsity/importance_threshold": -4.0718212126034103e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9279742594424145, "compression/movement_sparsity/model_sparsity": 0.8960954888478324, "compression_loss": 159.12901306152344, "distillation_loss": 6.676248073577881, "epoch": 1.8, "learning_rate": 4.1018596787827556e-05, "loss": 164.8561, "step": 2125, "task_loss": 3.43192458152771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.487191972545586, "compression/movement_sparsity/importance_threshold": -4.021758401094068e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9279257280801365, "compression/movement_sparsity/model_sparsity": 0.8960486246871486, "compression_loss": 159.1446990966797, "distillation_loss": 6.5509114265441895, "epoch": 1.8, "learning_rate": 4.1014370245139476e-05, "loss": 164.9735, "step": 2126, "task_loss": 3.757880449295044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.487350094545027, "compression/movement_sparsity/importance_threshold": -3.972107626850057e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9280090780119112, "compression/movement_sparsity/model_sparsity": 0.8961291112923525, "compression_loss": 159.16030883789062, "distillation_loss": 5.604918003082275, "epoch": 1.8, "learning_rate": 4.1010143702451396e-05, "loss": 164.6136, "step": 2127, "task_loss": 3.319204330444336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4875069097585532, "compression/movement_sparsity/importance_threshold": -3.9228671872380307e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9281407565951141, "compression/movement_sparsity/model_sparsity": 0.8962562663111315, "compression_loss": 159.17579650878906, "distillation_loss": 6.250582695007324, "epoch": 1.8, "learning_rate": 4.1005917159763316e-05, "loss": 165.7747, "step": 2128, "task_loss": 3.2477340698242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4876624236085125, "compression/movement_sparsity/importance_threshold": -3.874035379624642e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9283059063168707, "compression/movement_sparsity/model_sparsity": 0.896415742631886, "compression_loss": 159.19119262695312, "distillation_loss": 7.016054153442383, "epoch": 1.8, "learning_rate": 4.1001690617075235e-05, "loss": 165.5315, "step": 2129, "task_loss": 2.550827980041504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4878166415172533, "compression/movement_sparsity/importance_threshold": -3.825610501376631e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9283790253128138, "compression/movement_sparsity/model_sparsity": 0.8964863497653781, "compression_loss": 159.20648193359375, "distillation_loss": 5.557750701904297, "epoch": 1.8, "learning_rate": 4.0997464074387155e-05, "loss": 164.8011, "step": 2130, "task_loss": 2.319732666015625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4879695689071237, "compression/movement_sparsity/importance_threshold": -3.7775908498606514e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9284869628782537, "compression/movement_sparsity/model_sparsity": 0.8965905793433903, "compression_loss": 159.22161865234375, "distillation_loss": 4.993691444396973, "epoch": 1.8, "learning_rate": 4.0993237531699075e-05, "loss": 165.5109, "step": 2131, "task_loss": 2.7405595779418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4881212112004718, "compression/movement_sparsity/importance_threshold": -3.7299747224433556e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9285709567150807, "compression/movement_sparsity/model_sparsity": 0.896671687733527, "compression_loss": 159.23660278320312, "distillation_loss": 6.839860916137695, "epoch": 1.8, "learning_rate": 4.098901098901099e-05, "loss": 165.1701, "step": 2132, "task_loss": 3.1859803199768066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.488271573819646, "compression/movement_sparsity/importance_threshold": -3.682760416491397e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9287030287958156, "compression/movement_sparsity/model_sparsity": 0.8967992227319873, "compression_loss": 159.2515106201172, "distillation_loss": 6.459246635437012, "epoch": 1.8, "learning_rate": 4.098478444632291e-05, "loss": 164.8118, "step": 2133, "task_loss": 2.9032678604125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4884206621869942, "compression/movement_sparsity/importance_threshold": -3.635946229371429e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9287462539034955, "compression/movement_sparsity/model_sparsity": 0.8968409629242425, "compression_loss": 159.26617431640625, "distillation_loss": 7.230327129364014, "epoch": 1.8, "learning_rate": 4.0980557903634834e-05, "loss": 165.3036, "step": 2134, "task_loss": 2.9536778926849365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4885684817248646, "compression/movement_sparsity/importance_threshold": -3.589530458450278e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9288265750966906, "compression/movement_sparsity/model_sparsity": 0.8969185248373546, "compression_loss": 159.28060913085938, "distillation_loss": 7.201566696166992, "epoch": 1.8, "learning_rate": 4.097633136094675e-05, "loss": 165.8674, "step": 2135, "task_loss": 4.045864582061768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4887150378556058, "compression/movement_sparsity/importance_threshold": -3.5435114010943375e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9288811043152895, "compression/movement_sparsity/model_sparsity": 0.8969711808095431, "compression_loss": 159.2950897216797, "distillation_loss": 6.321192264556885, "epoch": 1.81, "learning_rate": 4.097210481825867e-05, "loss": 165.7875, "step": 2136, "task_loss": 3.1612582206726074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4888603360015658, "compression/movement_sparsity/importance_threshold": -3.497887354670607e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9289498709900456, "compression/movement_sparsity/model_sparsity": 0.8970375851374701, "compression_loss": 159.30931091308594, "distillation_loss": 4.523503303527832, "epoch": 1.81, "learning_rate": 4.0967878275570586e-05, "loss": 164.9672, "step": 2137, "task_loss": 2.5860769748687744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4890043815850924, "compression/movement_sparsity/importance_threshold": -3.45265661654548e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9290225010951155, "compression/movement_sparsity/model_sparsity": 0.8971077201749947, "compression_loss": 159.32337951660156, "distillation_loss": 5.124682426452637, "epoch": 1.81, "learning_rate": 4.09636517328825e-05, "loss": 164.9088, "step": 2138, "task_loss": 2.836557149887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4891471800285343, "compression/movement_sparsity/importance_threshold": -3.407817484085869e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9291000678055868, "compression/movement_sparsity/model_sparsity": 0.8971826222303382, "compression_loss": 159.33731079101562, "distillation_loss": 7.0748677253723145, "epoch": 1.81, "learning_rate": 4.0959425190194426e-05, "loss": 165.7097, "step": 2139, "task_loss": 2.9615187644958496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4892887367542396, "compression/movement_sparsity/importance_threshold": -3.363368254658168e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9292787514576103, "compression/movement_sparsity/model_sparsity": 0.8973551675492194, "compression_loss": 159.35110473632812, "distillation_loss": 6.316110610961914, "epoch": 1.81, "learning_rate": 4.0955198647506346e-05, "loss": 165.8481, "step": 2140, "task_loss": 2.997666835784912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4894290571845565, "compression/movement_sparsity/importance_threshold": -3.31930722562929e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9294088322023499, "compression/movement_sparsity/model_sparsity": 0.897480779620202, "compression_loss": 159.36483764648438, "distillation_loss": 6.787198066711426, "epoch": 1.81, "learning_rate": 4.095097210481826e-05, "loss": 165.29, "step": 2141, "task_loss": 2.8411874771118164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.489568146741833, "compression/movement_sparsity/importance_threshold": -3.275632694365888e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9294683099505175, "compression/movement_sparsity/model_sparsity": 0.8975382141247452, "compression_loss": 159.37843322753906, "distillation_loss": 6.278470039367676, "epoch": 1.81, "learning_rate": 4.094674556213018e-05, "loss": 165.6805, "step": 2142, "task_loss": 3.4164271354675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4897060108484173, "compression/movement_sparsity/importance_threshold": -3.2323429582344423e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.929585560290881, "compression/movement_sparsity/model_sparsity": 0.897651436555213, "compression_loss": 159.39183044433594, "distillation_loss": 5.80866813659668, "epoch": 1.81, "learning_rate": 4.09425190194421e-05, "loss": 166.2152, "step": 2143, "task_loss": 2.716982364654541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.489842654926658, "compression/movement_sparsity/importance_threshold": -3.189436314601779e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9296640570864279, "compression/movement_sparsity/model_sparsity": 0.8977272367443484, "compression_loss": 159.4051971435547, "distillation_loss": 5.769543647766113, "epoch": 1.81, "learning_rate": 4.093829247675402e-05, "loss": 164.9926, "step": 2144, "task_loss": 3.2212955951690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4899780843989028, "compression/movement_sparsity/importance_threshold": -3.146911060834638e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9297567078689586, "compression/movement_sparsity/model_sparsity": 0.897816704687472, "compression_loss": 159.41836547851562, "distillation_loss": 5.428375720977783, "epoch": 1.81, "learning_rate": 4.093406593406594e-05, "loss": 165.3882, "step": 2145, "task_loss": 3.019425630569458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4901123046875, "compression/movement_sparsity/importance_threshold": -3.1047654942994996e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9297966538305387, "compression/movement_sparsity/model_sparsity": 0.8978552783823837, "compression_loss": 159.43136596679688, "distillation_loss": 5.688624382019043, "epoch": 1.81, "learning_rate": 4.092983939137786e-05, "loss": 165.2465, "step": 2146, "task_loss": 2.6111912727355957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490245321214798, "compression/movement_sparsity/importance_threshold": -3.06299791236319e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9299867250626543, "compression/movement_sparsity/model_sparsity": 0.8980388200829487, "compression_loss": 159.44424438476562, "distillation_loss": 5.891379356384277, "epoch": 1.81, "learning_rate": 4.092561284868978e-05, "loss": 165.4216, "step": 2147, "task_loss": 2.8257851600646973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4903771394031449, "compression/movement_sparsity/importance_threshold": -3.0216066123922754e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9300391556277492, "compression/movement_sparsity/model_sparsity": 0.8980894494968372, "compression_loss": 159.45693969726562, "distillation_loss": 7.017194747924805, "epoch": 1.82, "learning_rate": 4.092138630600169e-05, "loss": 165.6185, "step": 2148, "task_loss": 3.9574997425079346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490507764674889, "compression/movement_sparsity/importance_threshold": -2.9805898917534962e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9300757985948942, "compression/movement_sparsity/model_sparsity": 0.8981248336653339, "compression_loss": 159.469482421875, "distillation_loss": 6.699184417724609, "epoch": 1.82, "learning_rate": 4.091715976331361e-05, "loss": 165.5337, "step": 2149, "task_loss": 3.262906551361084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4906372024523782, "compression/movement_sparsity/importance_threshold": -2.9399460478134187e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.930150992396006, "compression/movement_sparsity/model_sparsity": 0.8981974443280543, "compression_loss": 159.48208618164062, "distillation_loss": 5.176021099090576, "epoch": 1.82, "learning_rate": 4.091293322062553e-05, "loss": 165.3814, "step": 2150, "task_loss": 1.9097723960876465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.490765458157961, "compression/movement_sparsity/importance_threshold": -2.8996733779388696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9302515727500145, "compression/movement_sparsity/model_sparsity": 0.8982945694374812, "compression_loss": 159.49436950683594, "distillation_loss": 6.5063300132751465, "epoch": 1.82, "learning_rate": 4.090870667793745e-05, "loss": 165.2179, "step": 2151, "task_loss": 3.656248092651367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4908925372139854, "compression/movement_sparsity/importance_threshold": -2.859770179496502e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9303478484795065, "compression/movement_sparsity/model_sparsity": 0.8983875377994863, "compression_loss": 159.5064239501953, "distillation_loss": 6.440917015075684, "epoch": 1.82, "learning_rate": 4.090448013524937e-05, "loss": 165.1823, "step": 2152, "task_loss": 3.414710283279419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4910184450427997, "compression/movement_sparsity/importance_threshold": -2.8202347498528825e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9304561199216401, "compression/movement_sparsity/model_sparsity": 0.8984920897845008, "compression_loss": 159.51849365234375, "distillation_loss": 7.817091464996338, "epoch": 1.82, "learning_rate": 4.090025359256129e-05, "loss": 166.7656, "step": 2153, "task_loss": 4.013523578643799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.491143187066752, "compression/movement_sparsity/importance_threshold": -2.7810653863748376e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9304956962340235, "compression/movement_sparsity/model_sparsity": 0.8985303065288028, "compression_loss": 159.5303497314453, "distillation_loss": 5.37233304977417, "epoch": 1.82, "learning_rate": 4.08960270498732e-05, "loss": 165.4407, "step": 2154, "task_loss": 2.8311851024627686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4912667687081906, "compression/movement_sparsity/importance_threshold": -2.7422603864287604e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9306233444485654, "compression/movement_sparsity/model_sparsity": 0.8986535696344834, "compression_loss": 159.54196166992188, "distillation_loss": 6.927384376525879, "epoch": 1.82, "learning_rate": 4.089180050718512e-05, "loss": 166.795, "step": 2155, "task_loss": 3.552914619445801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4913891953894636, "compression/movement_sparsity/importance_threshold": -2.703818047381651e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.930682762575895, "compression/movement_sparsity/model_sparsity": 0.8987109465663475, "compression_loss": 159.5537109375, "distillation_loss": 5.755553245544434, "epoch": 1.82, "learning_rate": 4.088757396449705e-05, "loss": 165.1994, "step": 2156, "task_loss": 3.149372100830078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4915104725329194, "compression/movement_sparsity/importance_threshold": -2.665736666599989e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9307303519289297, "compression/movement_sparsity/model_sparsity": 0.8987569010787035, "compression_loss": 159.565185546875, "distillation_loss": 4.5558576583862305, "epoch": 1.82, "learning_rate": 4.088334742180896e-05, "loss": 164.7463, "step": 2157, "task_loss": 2.1252498626708984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4916306055609059, "compression/movement_sparsity/importance_threshold": -2.6280145414504276e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9307655162992878, "compression/movement_sparsity/model_sparsity": 0.8987908574447617, "compression_loss": 159.57650756835938, "distillation_loss": 6.22182035446167, "epoch": 1.82, "learning_rate": 4.087912087912088e-05, "loss": 165.9829, "step": 2158, "task_loss": 3.830855369567871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4917495998957713, "compression/movement_sparsity/importance_threshold": -2.5906499692997936e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9308491762594211, "compression/movement_sparsity/model_sparsity": 0.8988716434278962, "compression_loss": 159.58770751953125, "distillation_loss": 6.640441417694092, "epoch": 1.82, "learning_rate": 4.08748943364328e-05, "loss": 165.9178, "step": 2159, "task_loss": 2.7593860626220703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4918674609598641, "compression/movement_sparsity/importance_threshold": -2.5536412475146535e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9308950604564838, "compression/movement_sparsity/model_sparsity": 0.8989159513616336, "compression_loss": 159.5988311767578, "distillation_loss": 7.90350341796875, "epoch": 1.83, "learning_rate": 4.087066779374472e-05, "loss": 165.9282, "step": 2160, "task_loss": 4.193729877471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4919841941755325, "compression/movement_sparsity/importance_threshold": -2.5169866734616603e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9310662915037349, "compression/movement_sparsity/model_sparsity": 0.8990813000956432, "compression_loss": 159.60995483398438, "distillation_loss": 4.9071807861328125, "epoch": 1.83, "learning_rate": 4.086644125105663e-05, "loss": 165.5444, "step": 2161, "task_loss": 1.8456387519836426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4920998049651244, "compression/movement_sparsity/importance_threshold": -2.480684544507554e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9310616053058539, "compression/movement_sparsity/model_sparsity": 0.8990767748830759, "compression_loss": 159.62083435058594, "distillation_loss": 8.28921127319336, "epoch": 1.83, "learning_rate": 4.086221470836856e-05, "loss": 165.9566, "step": 2162, "task_loss": 4.00072717666626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4922142987509879, "compression/movement_sparsity/importance_threshold": -2.444733158018901e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9311380153720645, "compression/movement_sparsity/model_sparsity": 0.8991505600284474, "compression_loss": 159.6317138671875, "distillation_loss": 5.234555721282959, "epoch": 1.83, "learning_rate": 4.085798816568048e-05, "loss": 165.4295, "step": 2163, "task_loss": 2.0783655643463135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4923276809554715, "compression/movement_sparsity/importance_threshold": -2.409130811362615e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9312478131076556, "compression/movement_sparsity/model_sparsity": 0.8992565858740434, "compression_loss": 159.642578125, "distillation_loss": 6.005222320556641, "epoch": 1.83, "learning_rate": 4.085376162299239e-05, "loss": 165.5309, "step": 2164, "task_loss": 1.9089618921279907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4924399570009235, "compression/movement_sparsity/importance_threshold": -2.3738758019050017e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9313164009198971, "compression/movement_sparsity/model_sparsity": 0.8993228174839337, "compression_loss": 159.6532440185547, "distillation_loss": 8.237293243408203, "epoch": 1.83, "learning_rate": 4.084953508030431e-05, "loss": 166.4981, "step": 2165, "task_loss": 3.315074920654297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4925511323096916, "compression/movement_sparsity/importance_threshold": -2.3389664270130617e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9313727664603118, "compression/movement_sparsity/model_sparsity": 0.8993772466946345, "compression_loss": 159.6637725830078, "distillation_loss": 4.860881805419922, "epoch": 1.83, "learning_rate": 4.084530853761623e-05, "loss": 165.6496, "step": 2166, "task_loss": 2.7041468620300293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4926612123041245, "compression/movement_sparsity/importance_threshold": -2.3044009840532745e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9314149183929046, "compression/movement_sparsity/model_sparsity": 0.8994179505786681, "compression_loss": 159.6742706298828, "distillation_loss": 5.192461013793945, "epoch": 1.83, "learning_rate": 4.084108199492815e-05, "loss": 165.925, "step": 2167, "task_loss": 2.4513585567474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4927702024065703, "compression/movement_sparsity/importance_threshold": -2.27017777039238e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9314703538482437, "compression/movement_sparsity/model_sparsity": 0.899471481655577, "compression_loss": 159.68455505371094, "distillation_loss": 5.959852695465088, "epoch": 1.83, "learning_rate": 4.083685545224007e-05, "loss": 165.7661, "step": 2168, "task_loss": 2.960866928100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4928781080393767, "compression/movement_sparsity/importance_threshold": -2.2362950833970316e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9315436636308689, "compression/movement_sparsity/model_sparsity": 0.8995422730216418, "compression_loss": 159.69476318359375, "distillation_loss": 6.3040618896484375, "epoch": 1.83, "learning_rate": 4.083262890955199e-05, "loss": 165.6965, "step": 2169, "task_loss": 3.5666940212249756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4929849346248927, "compression/movement_sparsity/importance_threshold": -2.202751220433969e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.93166289338306, "compression/movement_sparsity/model_sparsity": 0.8996574068650515, "compression_loss": 159.7049560546875, "distillation_loss": 6.221062660217285, "epoch": 1.83, "learning_rate": 4.0828402366863904e-05, "loss": 166.3867, "step": 2170, "task_loss": 3.006350040435791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.493090687585466, "compression/movement_sparsity/importance_threshold": -2.1695444788697588e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317134757021713, "compression/movement_sparsity/model_sparsity": 0.8997062515258919, "compression_loss": 159.71481323242188, "distillation_loss": 5.142766952514648, "epoch": 1.83, "learning_rate": 4.0824175824175824e-05, "loss": 165.5593, "step": 2171, "task_loss": 2.453266143798828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4931953723434446, "compression/movement_sparsity/importance_threshold": -2.1366731560711408e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317848656938073, "compression/movement_sparsity/model_sparsity": 0.8997751890516938, "compression_loss": 159.72474670410156, "distillation_loss": 5.263578414916992, "epoch": 1.84, "learning_rate": 4.0819949281487743e-05, "loss": 165.2598, "step": 2172, "task_loss": 2.365400552749634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4932989943211772, "compression/movement_sparsity/importance_threshold": -2.1041355494047682e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317053791923465, "compression/movement_sparsity/model_sparsity": 0.8996984331560874, "compression_loss": 159.73452758789062, "distillation_loss": 5.399177551269531, "epoch": 1.84, "learning_rate": 4.081572273879967e-05, "loss": 165.5539, "step": 2173, "task_loss": 2.463406801223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4934015589410117, "compression/movement_sparsity/importance_threshold": -2.0719299562372943e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9317769957431675, "compression/movement_sparsity/model_sparsity": 0.8997675894580695, "compression_loss": 159.74412536621094, "distillation_loss": 6.036779403686523, "epoch": 1.84, "learning_rate": 4.081149619611158e-05, "loss": 166.406, "step": 2174, "task_loss": 3.9989817142486572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4935030716252964, "compression/movement_sparsity/importance_threshold": -2.040054673935459e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9318264810388563, "compression/movement_sparsity/model_sparsity": 0.8998153747816168, "compression_loss": 159.7536163330078, "distillation_loss": 6.504644393920898, "epoch": 1.84, "learning_rate": 4.08072696534235e-05, "loss": 166.4019, "step": 2175, "task_loss": 3.4047913551330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4936035377963792, "compression/movement_sparsity/importance_threshold": -2.0085079998659154e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9318922666717034, "compression/movement_sparsity/model_sparsity": 0.8998789004755953, "compression_loss": 159.7630615234375, "distillation_loss": 5.924861907958984, "epoch": 1.84, "learning_rate": 4.080304311073542e-05, "loss": 165.715, "step": 2176, "task_loss": 3.7973406314849854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4937029628766088, "compression/movement_sparsity/importance_threshold": -1.97728823139523e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.931958100001221, "compression/movement_sparsity/model_sparsity": 0.899942472227717, "compression_loss": 159.77247619628906, "distillation_loss": 7.481154441833496, "epoch": 1.84, "learning_rate": 4.0798816568047335e-05, "loss": 166.6641, "step": 2177, "task_loss": 3.8168303966522217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.493801352288333, "compression/movement_sparsity/importance_threshold": -1.9463936658901428e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9319233291283948, "compression/movement_sparsity/model_sparsity": 0.89990889584134, "compression_loss": 159.78163146972656, "distillation_loss": 6.10257625579834, "epoch": 1.84, "learning_rate": 4.0794590025359255e-05, "loss": 166.901, "step": 2178, "task_loss": 3.3615942001342773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4938987114539004, "compression/movement_sparsity/importance_threshold": -1.9158226007173935e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9319617130240146, "compression/movement_sparsity/model_sparsity": 0.8999459611320626, "compression_loss": 159.79087829589844, "distillation_loss": 9.399238586425781, "epoch": 1.84, "learning_rate": 4.079036348267118e-05, "loss": 166.5547, "step": 2179, "task_loss": 4.1643900871276855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4939950457956588, "compression/movement_sparsity/importance_threshold": -1.885573333243549e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9320233848190274, "compression/movement_sparsity/model_sparsity": 0.900005514311192, "compression_loss": 159.7999725341797, "distillation_loss": 5.253705024719238, "epoch": 1.84, "learning_rate": 4.0786136939983095e-05, "loss": 165.3041, "step": 2180, "task_loss": 3.240295171737671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4940903607359564, "compression/movement_sparsity/importance_threshold": -1.8556441608354353e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9320854620357397, "compression/movement_sparsity/model_sparsity": 0.9000654589845384, "compression_loss": 159.80892944335938, "distillation_loss": 5.829642295837402, "epoch": 1.84, "learning_rate": 4.0781910397295014e-05, "loss": 166.2426, "step": 2181, "task_loss": 3.580606698989868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4941846616971417, "compression/movement_sparsity/importance_threshold": -1.826033380859446e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9321952359229955, "compression/movement_sparsity/model_sparsity": 0.9001714618010629, "compression_loss": 159.8177032470703, "distillation_loss": 8.56905746459961, "epoch": 1.84, "learning_rate": 4.0777683854606934e-05, "loss": 166.9621, "step": 2182, "task_loss": 3.6865451335906982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4942779541015625, "compression/movement_sparsity/importance_threshold": -1.7967392906825808e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9322956254903217, "compression/movement_sparsity/model_sparsity": 0.9002684026779172, "compression_loss": 159.82647705078125, "distillation_loss": 5.72161865234375, "epoch": 1.84, "learning_rate": 4.077345731191885e-05, "loss": 166.4137, "step": 2183, "task_loss": 3.208189010620117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4943702433715673, "compression/movement_sparsity/importance_threshold": -1.7677601876713196e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9323374435462207, "compression/movement_sparsity/model_sparsity": 0.9003087841549486, "compression_loss": 159.83514404296875, "distillation_loss": 7.543937683105469, "epoch": 1.85, "learning_rate": 4.0769230769230773e-05, "loss": 165.4243, "step": 2184, "task_loss": 3.0690970420837402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4944615349295043, "compression/movement_sparsity/importance_threshold": -1.7390943691923157e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9324379881277264, "compression/movement_sparsity/model_sparsity": 0.9004058747207682, "compression_loss": 159.8437042236328, "distillation_loss": 6.812084674835205, "epoch": 1.85, "learning_rate": 4.076500422654269e-05, "loss": 165.7827, "step": 2185, "task_loss": 2.6723556518554688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4945518341977215, "compression/movement_sparsity/importance_threshold": -1.7107401326123955e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9325237228930282, "compression/movement_sparsity/model_sparsity": 0.9004886642331309, "compression_loss": 159.85211181640625, "distillation_loss": 7.798141956329346, "epoch": 1.85, "learning_rate": 4.0760777683854606e-05, "loss": 166.3794, "step": 2186, "task_loss": 3.6036007404327393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4946411465985672, "compression/movement_sparsity/importance_threshold": -1.682695775298039e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9325968061164684, "compression/movement_sparsity/model_sparsity": 0.9005592368230156, "compression_loss": 159.8603973388672, "distillation_loss": 6.098156929016113, "epoch": 1.85, "learning_rate": 4.0756551141166526e-05, "loss": 166.3632, "step": 2187, "task_loss": 1.9365739822387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4947294775543898, "compression/movement_sparsity/importance_threshold": -1.6549595946160726e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9327273042070753, "compression/movement_sparsity/model_sparsity": 0.9006852519027512, "compression_loss": 159.8687286376953, "distillation_loss": 7.246183395385742, "epoch": 1.85, "learning_rate": 4.0752324598478446e-05, "loss": 166.2349, "step": 2188, "task_loss": 2.444012403488159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.494816832487537, "compression/movement_sparsity/importance_threshold": -1.627529887933063e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9327448208093324, "compression/movement_sparsity/model_sparsity": 0.9007021667558334, "compression_loss": 159.8767547607422, "distillation_loss": 8.717750549316406, "epoch": 1.85, "learning_rate": 4.0748098055790365e-05, "loss": 166.6618, "step": 2189, "task_loss": 3.457547664642334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4949032168203573, "compression/movement_sparsity/importance_threshold": -1.60040495261575e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9328447572582885, "compression/movement_sparsity/model_sparsity": 0.9007986700803273, "compression_loss": 159.8848114013672, "distillation_loss": 6.791445732116699, "epoch": 1.85, "learning_rate": 4.0743871513102285e-05, "loss": 165.9034, "step": 2190, "task_loss": 3.6058197021484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.494988635975199, "compression/movement_sparsity/importance_threshold": -1.5735830860307866e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9329121526537665, "compression/movement_sparsity/model_sparsity": 0.9008637502366381, "compression_loss": 159.892822265625, "distillation_loss": 6.742925643920898, "epoch": 1.85, "learning_rate": 4.0739644970414205e-05, "loss": 166.5036, "step": 2191, "task_loss": 3.7004892826080322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4950730953744102, "compression/movement_sparsity/importance_threshold": -1.5470625855448263e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9329478893841712, "compression/movement_sparsity/model_sparsity": 0.9008982593004143, "compression_loss": 159.9007110595703, "distillation_loss": 8.056020736694336, "epoch": 1.85, "learning_rate": 4.0735418427726125e-05, "loss": 166.5636, "step": 2192, "task_loss": 2.8520755767822266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495156600440339, "compression/movement_sparsity/importance_threshold": -1.5208417485246088e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933060441602486, "compression/movement_sparsity/model_sparsity": 0.901006945003779, "compression_loss": 159.9083251953125, "distillation_loss": 4.648472785949707, "epoch": 1.85, "learning_rate": 4.073119188503804e-05, "loss": 165.8134, "step": 2193, "task_loss": 2.1897342205047607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4952391565953338, "compression/movement_sparsity/importance_threshold": -1.4949188723367007e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9331152689252757, "compression/movement_sparsity/model_sparsity": 0.9010598888393623, "compression_loss": 159.9159393310547, "distillation_loss": 6.803522109985352, "epoch": 1.85, "learning_rate": 4.072696534234996e-05, "loss": 166.1004, "step": 2194, "task_loss": 2.787987470626831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4953207692617425, "compression/movement_sparsity/importance_threshold": -1.4692922543478419e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933110737741574, "compression/movement_sparsity/model_sparsity": 0.9010555133157604, "compression_loss": 159.92356872558594, "distillation_loss": 6.17678165435791, "epoch": 1.85, "learning_rate": 4.072273879966188e-05, "loss": 165.7399, "step": 2195, "task_loss": 2.9803643226623535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4954014438619134, "compression/movement_sparsity/importance_threshold": -1.4439601919246856e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9331464148511406, "compression/movement_sparsity/model_sparsity": 0.9010899648068577, "compression_loss": 159.93101501464844, "distillation_loss": 4.351841449737549, "epoch": 1.86, "learning_rate": 4.07185122569738e-05, "loss": 165.9034, "step": 2196, "task_loss": 2.2298755645751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495481185818195, "compression/movement_sparsity/importance_threshold": -1.4189209824339716e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933213893715792, "compression/movement_sparsity/model_sparsity": 0.901155125564919, "compression_loss": 159.93820190429688, "distillation_loss": 5.4615559577941895, "epoch": 1.86, "learning_rate": 4.0714285714285717e-05, "loss": 165.991, "step": 2197, "task_loss": 2.954812526702881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4955600005529348, "compression/movement_sparsity/importance_threshold": -1.3941729232421797e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9333205196227918, "compression/movement_sparsity/model_sparsity": 0.9012580885439937, "compression_loss": 159.94540405273438, "distillation_loss": 7.4501543045043945, "epoch": 1.86, "learning_rate": 4.0710059171597636e-05, "loss": 165.7139, "step": 2198, "task_loss": 3.646406412124634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4956378934884818, "compression/movement_sparsity/importance_threshold": -1.3697143117162233e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9334077210607129, "compression/movement_sparsity/model_sparsity": 0.9013422943442592, "compression_loss": 159.95249938964844, "distillation_loss": 7.007129669189453, "epoch": 1.86, "learning_rate": 4.070583262890955e-05, "loss": 165.9149, "step": 2199, "task_loss": 3.753594160079956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4957148700471836, "compression/movement_sparsity/importance_threshold": -1.3455434452226689e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9333678227958033, "compression/movement_sparsity/model_sparsity": 0.9013037667074907, "compression_loss": 159.95947265625, "distillation_loss": 5.433218955993652, "epoch": 1.86, "learning_rate": 4.070160608622147e-05, "loss": 166.3544, "step": 2200, "task_loss": 2.5983219146728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.495790935651389, "compression/movement_sparsity/importance_threshold": -1.3216586211281696e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9334682719839679, "compression/movement_sparsity/model_sparsity": 0.9014007651570239, "compression_loss": 159.96640014648438, "distillation_loss": 6.349350452423096, "epoch": 1.86, "learning_rate": 4.0697379543533395e-05, "loss": 165.8351, "step": 2201, "task_loss": 3.0378222465515137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4958660957234455, "compression/movement_sparsity/importance_threshold": -1.2980581367993786e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9335358820144631, "compression/movement_sparsity/model_sparsity": 0.9014660525749789, "compression_loss": 159.97323608398438, "distillation_loss": 6.311150074005127, "epoch": 1.86, "learning_rate": 4.0693153000845315e-05, "loss": 166.0686, "step": 2202, "task_loss": 3.006481885910034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4959403556857018, "compression/movement_sparsity/importance_threshold": -1.2747402896029492e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933615559302606, "compression/movement_sparsity/model_sparsity": 0.9015429927031581, "compression_loss": 159.9799346923828, "distillation_loss": 6.676658630371094, "epoch": 1.86, "learning_rate": 4.068892645815723e-05, "loss": 166.5747, "step": 2203, "task_loss": 3.5015902519226074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4960137209605058, "compression/movement_sparsity/importance_threshold": -1.2517033769057079e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9336323962273079, "compression/movement_sparsity/model_sparsity": 0.9015592512277, "compression_loss": 159.98660278320312, "distillation_loss": 6.951223850250244, "epoch": 1.86, "learning_rate": 4.068469991546915e-05, "loss": 166.7534, "step": 2204, "task_loss": 4.691531658172607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4960861969702057, "compression/movement_sparsity/importance_threshold": -1.2289456960742212e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9337604498635493, "compression/movement_sparsity/model_sparsity": 0.9016829058275976, "compression_loss": 159.99314880371094, "distillation_loss": 7.449137210845947, "epoch": 1.86, "learning_rate": 4.068047337278107e-05, "loss": 166.2661, "step": 2205, "task_loss": 3.614295721054077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49615778913715, "compression/movement_sparsity/importance_threshold": -1.2064655444751424e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9339130315126178, "compression/movement_sparsity/model_sparsity": 0.9018302458276245, "compression_loss": 159.99954223632812, "distillation_loss": 7.655429840087891, "epoch": 1.86, "learning_rate": 4.067624683009299e-05, "loss": 166.3201, "step": 2206, "task_loss": 4.149497032165527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4962285028836866, "compression/movement_sparsity/importance_threshold": -1.1842612194751245e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9339630653200179, "compression/movement_sparsity/model_sparsity": 0.9018785608198183, "compression_loss": 160.0059051513672, "distillation_loss": 7.039043426513672, "epoch": 1.87, "learning_rate": 4.067202028740491e-05, "loss": 166.503, "step": 2207, "task_loss": 3.951432228088379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.496298343632164, "compression/movement_sparsity/importance_threshold": -1.1623310184409076e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.933984218793404, "compression/movement_sparsity/model_sparsity": 0.9018989876063179, "compression_loss": 160.0121612548828, "distillation_loss": 8.174517631530762, "epoch": 1.87, "learning_rate": 4.066779374471683e-05, "loss": 167.0821, "step": 2208, "task_loss": 3.1212151050567627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4963673168049298, "compression/movement_sparsity/importance_threshold": -1.1406732387391448e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9340117636206429, "compression/movement_sparsity/model_sparsity": 0.9019255861840033, "compression_loss": 160.01821899414062, "distillation_loss": 7.411257743835449, "epoch": 1.87, "learning_rate": 4.066356720202874e-05, "loss": 167.1383, "step": 2209, "task_loss": 4.18795919418335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4964354278243328, "compression/movement_sparsity/importance_threshold": -1.1192861777364893e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9341308264344869, "compression/movement_sparsity/model_sparsity": 0.9020405588239118, "compression_loss": 160.0242462158203, "distillation_loss": 7.036897659301758, "epoch": 1.87, "learning_rate": 4.065934065934066e-05, "loss": 166.6799, "step": 2210, "task_loss": 3.060356616973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4965026821127212, "compression/movement_sparsity/importance_threshold": -1.0981681327995943e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9341863572831671, "compression/movement_sparsity/model_sparsity": 0.9020941820171069, "compression_loss": 160.0299835205078, "distillation_loss": 7.661181926727295, "epoch": 1.87, "learning_rate": 4.065511411665258e-05, "loss": 166.3157, "step": 2211, "task_loss": 2.883307933807373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4965690850924427, "compression/movement_sparsity/importance_threshold": -1.0773174012952864e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9342052332405347, "compression/movement_sparsity/model_sparsity": 0.9021124095272697, "compression_loss": 160.0356903076172, "distillation_loss": 6.834277629852295, "epoch": 1.87, "learning_rate": 4.06508875739645e-05, "loss": 166.9267, "step": 2212, "task_loss": 2.8401851654052734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4966346421858459, "compression/movement_sparsity/importance_threshold": -1.0567322805899587e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9342909083849984, "compression/movement_sparsity/model_sparsity": 0.9021951414669535, "compression_loss": 160.04135131835938, "distillation_loss": 6.103752613067627, "epoch": 1.87, "learning_rate": 4.064666103127642e-05, "loss": 165.3373, "step": 2213, "task_loss": 3.3360533714294434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4966993588152788, "compression/movement_sparsity/importance_threshold": -1.0364110680505245e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.9342650567895638, "compression/movement_sparsity/model_sparsity": 0.9021701779533509, "compression_loss": 160.04685974121094, "distillation_loss": 7.086657524108887, "epoch": 1.87, "learning_rate": 4.064243448858834e-05, "loss": 166.2703, "step": 2214, "task_loss": 3.6259615421295166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4967632404030895, "compression/movement_sparsity/importance_threshold": -1.0163520610435503e-05, "compression/movement_sparsity/linear_layer_sparsity": 0.934326382783715, "compression/movement_sparsity/model_sparsity": 0.9022293972109422, "compression_loss": 160.0523223876953, "distillation_loss": 7.847537040710449, "epoch": 1.87, "learning_rate": 4.063820794590025e-05, "loss": 166.6605, "step": 2215, "task_loss": 3.4278292655944824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4968262923716265, "compression/movement_sparsity/importance_threshold": -9.96553556935776e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9344232070249182, "compression/movement_sparsity/model_sparsity": 0.9023228952415939, "compression_loss": 160.05772399902344, "distillation_loss": 7.414052963256836, "epoch": 1.87, "learning_rate": 4.063398140321217e-05, "loss": 166.4403, "step": 2216, "task_loss": 3.3616738319396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.496888520143238, "compression/movement_sparsity/importance_threshold": -9.77013853093768e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9345048756490563, "compression/movement_sparsity/model_sparsity": 0.9024017582972507, "compression_loss": 160.0629119873047, "distillation_loss": 6.021695137023926, "epoch": 1.87, "learning_rate": 4.062975486052409e-05, "loss": 165.4394, "step": 2217, "task_loss": 2.9772891998291016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4969499291402717, "compression/movement_sparsity/importance_threshold": -9.577312468842664e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9345621116537084, "compression/movement_sparsity/model_sparsity": 0.9024570280690645, "compression_loss": 160.068115234375, "distillation_loss": 5.793888568878174, "epoch": 1.87, "learning_rate": 4.062552831783602e-05, "loss": 166.2238, "step": 2218, "task_loss": 3.2094666957855225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4970105247850765, "compression/movement_sparsity/importance_threshold": -9.387040356739242e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9346653272487645, "compression/movement_sparsity/model_sparsity": 0.9025566978909021, "compression_loss": 160.07327270507812, "distillation_loss": 5.235283851623535, "epoch": 1.88, "learning_rate": 4.062130177514793e-05, "loss": 166.5598, "step": 2219, "task_loss": 3.1168527603149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4970703125, "compression/movement_sparsity/importance_threshold": -9.199305168294813e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9347840204134118, "compression/movement_sparsity/model_sparsity": 0.9026713135802009, "compression_loss": 160.0782470703125, "distillation_loss": 5.164148330688477, "epoch": 1.88, "learning_rate": 4.061707523245985e-05, "loss": 165.6407, "step": 2220, "task_loss": 2.7088468074798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4971292977073907, "compression/movement_sparsity/importance_threshold": -9.014089877175044e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9348311804964117, "compression/movement_sparsity/model_sparsity": 0.9027168535692683, "compression_loss": 160.08314514160156, "distillation_loss": 7.312452793121338, "epoch": 1.88, "learning_rate": 4.061284868977177e-05, "loss": 166.1839, "step": 2221, "task_loss": 3.081338405609131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4971874858295968, "compression/movement_sparsity/importance_threshold": -8.831377457047332e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9348431165882152, "compression/movement_sparsity/model_sparsity": 0.9027283796195986, "compression_loss": 160.08787536621094, "distillation_loss": 6.593239784240723, "epoch": 1.88, "learning_rate": 4.060862214708368e-05, "loss": 166.7792, "step": 2222, "task_loss": 4.395792484283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4972448822889661, "compression/movement_sparsity/importance_threshold": -8.65115088157821e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9348386092528488, "compression/movement_sparsity/model_sparsity": 0.9027240271250683, "compression_loss": 160.092529296875, "distillation_loss": 5.105921745300293, "epoch": 1.88, "learning_rate": 4.060439560439561e-05, "loss": 165.7791, "step": 2223, "task_loss": 3.8668746948242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4973014925078474, "compression/movement_sparsity/importance_threshold": -8.473393124434209e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9349293044718872, "compression/movement_sparsity/model_sparsity": 0.9028116066843216, "compression_loss": 160.0972442626953, "distillation_loss": 6.887876987457275, "epoch": 1.88, "learning_rate": 4.060016906170753e-05, "loss": 166.7268, "step": 2224, "task_loss": 4.878623008728027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4973573219085887, "compression/movement_sparsity/importance_threshold": -8.298087159281861e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9349935041904387, "compression/movement_sparsity/model_sparsity": 0.9028736009450394, "compression_loss": 160.10179138183594, "distillation_loss": 9.14314079284668, "epoch": 1.88, "learning_rate": 4.059594251901944e-05, "loss": 167.1431, "step": 2225, "task_loss": 4.028992652893066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4974123759135378, "compression/movement_sparsity/importance_threshold": -8.125215959789434e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9349920136694843, "compression/movement_sparsity/model_sparsity": 0.902872161628065, "compression_loss": 160.10635375976562, "distillation_loss": 5.251765251159668, "epoch": 1.88, "learning_rate": 4.059171597633136e-05, "loss": 165.9658, "step": 2226, "task_loss": 2.324880361557007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4974666599450435, "compression/movement_sparsity/importance_threshold": -7.954762499621723e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9350739923219807, "compression/movement_sparsity/model_sparsity": 0.9029513240616526, "compression_loss": 160.11093139648438, "distillation_loss": 7.090186595916748, "epoch": 1.88, "learning_rate": 4.058748943364328e-05, "loss": 165.9931, "step": 2227, "task_loss": 2.8340048789978027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4975201794254536, "compression/movement_sparsity/importance_threshold": -7.786709752446129e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9351306798149217, "compression/movement_sparsity/model_sparsity": 0.9030060641648198, "compression_loss": 160.11534118652344, "distillation_loss": 5.584734916687012, "epoch": 1.88, "learning_rate": 4.05832628909552e-05, "loss": 166.7349, "step": 2228, "task_loss": 3.7214515209198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4975729397771163, "compression/movement_sparsity/importance_threshold": -7.621040691929183e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9351840166167569, "compression/movement_sparsity/model_sparsity": 0.9030575686834288, "compression_loss": 160.1196746826172, "distillation_loss": 4.749370574951172, "epoch": 1.88, "learning_rate": 4.057903634826712e-05, "loss": 166.1906, "step": 2229, "task_loss": 2.4553816318511963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4976249464223799, "compression/movement_sparsity/importance_threshold": -7.457738291738285e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9352279094778245, "compression/movement_sparsity/model_sparsity": 0.9030999536896885, "compression_loss": 160.12380981445312, "distillation_loss": 7.9535441398620605, "epoch": 1.88, "learning_rate": 4.057480980557904e-05, "loss": 167.5517, "step": 2230, "task_loss": 3.4968388080596924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4976762047835925, "compression/movement_sparsity/importance_threshold": -7.296785525539966e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9352907259929302, "compression/movement_sparsity/model_sparsity": 0.9031606122642541, "compression_loss": 160.1279296875, "distillation_loss": 8.037897109985352, "epoch": 1.89, "learning_rate": 4.057058326289096e-05, "loss": 166.6816, "step": 2231, "task_loss": 3.4865970611572266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4977267202831026, "compression/movement_sparsity/importance_threshold": -7.138165366999891e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9353377072134155, "compression/movement_sparsity/model_sparsity": 0.9032059795352846, "compression_loss": 160.1321258544922, "distillation_loss": 6.906966686248779, "epoch": 1.89, "learning_rate": 4.0566356720202873e-05, "loss": 166.5066, "step": 2232, "task_loss": 2.8928592205047607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4977764983432582, "compression/movement_sparsity/importance_threshold": -6.981860789786326e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9353911274844242, "compression/movement_sparsity/model_sparsity": 0.9032575646556441, "compression_loss": 160.13607788085938, "distillation_loss": 6.219843864440918, "epoch": 1.89, "learning_rate": 4.056213017751479e-05, "loss": 167.1917, "step": 2233, "task_loss": 2.7306981086730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4978255443864072, "compression/movement_sparsity/importance_threshold": -6.827854767565804e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9354974552872332, "compression/movement_sparsity/model_sparsity": 0.9033602397713241, "compression_loss": 160.13999938964844, "distillation_loss": 6.1700849533081055, "epoch": 1.89, "learning_rate": 4.055790363482671e-05, "loss": 166.186, "step": 2234, "task_loss": 2.377737522125244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4978738638348983, "compression/movement_sparsity/importance_threshold": -6.676130274003121e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9355415031624801, "compression/movement_sparsity/model_sparsity": 0.9034027744665492, "compression_loss": 160.1438751220703, "distillation_loss": 6.190776824951172, "epoch": 1.89, "learning_rate": 4.055367709213863e-05, "loss": 166.3818, "step": 2235, "task_loss": 2.9691388607025146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4979214621110792, "compression/movement_sparsity/importance_threshold": -6.526670282767412e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9356016009673648, "compression/movement_sparsity/model_sparsity": 0.9034608077269536, "compression_loss": 160.1475372314453, "distillation_loss": 6.897368431091309, "epoch": 1.89, "learning_rate": 4.054945054945055e-05, "loss": 166.3111, "step": 2236, "task_loss": 3.5071003437042236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4979683446372987, "compression/movement_sparsity/importance_threshold": -6.379457767524341e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9357159418108251, "compression/movement_sparsity/model_sparsity": 0.9035712206106875, "compression_loss": 160.1512451171875, "distillation_loss": 4.895448684692383, "epoch": 1.89, "learning_rate": 4.054522400676247e-05, "loss": 166.3977, "step": 2237, "task_loss": 2.651609182357788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4980145168359045, "compression/movement_sparsity/importance_threshold": -6.234475701939572e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9358109535785476, "compression/movement_sparsity/model_sparsity": 0.9036629684318983, "compression_loss": 160.15481567382812, "distillation_loss": 5.931543350219727, "epoch": 1.89, "learning_rate": 4.0540997464074385e-05, "loss": 166.6772, "step": 2238, "task_loss": 3.097256660461426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4980599841292448, "compression/movement_sparsity/importance_threshold": -6.09170705968224e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.935832619791142, "compression/movement_sparsity/model_sparsity": 0.903683890343437, "compression_loss": 160.15835571289062, "distillation_loss": 5.540727615356445, "epoch": 1.89, "learning_rate": 4.0536770921386305e-05, "loss": 166.4461, "step": 2239, "task_loss": 3.2605767250061035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498104751939668, "compression/movement_sparsity/importance_threshold": -5.951134814417142e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9358843945270169, "compression/movement_sparsity/model_sparsity": 0.9037338864578569, "compression_loss": 160.1618194580078, "distillation_loss": 6.151486873626709, "epoch": 1.89, "learning_rate": 4.053254437869823e-05, "loss": 167.0416, "step": 2240, "task_loss": 2.844891309738159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4981488256895221, "compression/movement_sparsity/importance_threshold": -5.812741939811676e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9359477834021692, "compression/movement_sparsity/model_sparsity": 0.9037950977301407, "compression_loss": 160.16519165039062, "distillation_loss": 7.432068347930908, "epoch": 1.89, "learning_rate": 4.0528317836010144e-05, "loss": 166.8899, "step": 2241, "task_loss": 3.0972211360931396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4981922108011556, "compression/movement_sparsity/importance_threshold": -5.676511409533243e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.936020520824748, "compression/movement_sparsity/model_sparsity": 0.9038653363984874, "compression_loss": 160.1685333251953, "distillation_loss": 7.028159141540527, "epoch": 1.89, "learning_rate": 4.0524091293322064e-05, "loss": 166.0256, "step": 2242, "task_loss": 3.8466532230377197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4982349126969166, "compression/movement_sparsity/importance_threshold": -5.542426197247506e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9360695649262343, "compression/movement_sparsity/model_sparsity": 0.9039126956842104, "compression_loss": 160.17178344726562, "distillation_loss": 8.252093315124512, "epoch": 1.9, "learning_rate": 4.0519864750633984e-05, "loss": 166.6953, "step": 2243, "task_loss": 3.964700222015381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498276936799153, "compression/movement_sparsity/importance_threshold": -5.410469276620998e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9360664050218107, "compression/movement_sparsity/model_sparsity": 0.9039096443322248, "compression_loss": 160.17498779296875, "distillation_loss": 6.128657341003418, "epoch": 1.9, "learning_rate": 4.05156382079459e-05, "loss": 167.3083, "step": 2244, "task_loss": 2.7759294509887695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4983182885302133, "compression/movement_sparsity/importance_threshold": -5.280623621321118e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9360977298101901, "compression/movement_sparsity/model_sparsity": 0.903939893017757, "compression_loss": 160.17808532714844, "distillation_loss": 7.01299524307251, "epoch": 1.9, "learning_rate": 4.051141166525782e-05, "loss": 166.3831, "step": 2245, "task_loss": 3.278843641281128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4983589733124458, "compression/movement_sparsity/importance_threshold": -5.152872205014397e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9361933735587974, "compression/movement_sparsity/model_sparsity": 0.904032251109365, "compression_loss": 160.18113708496094, "distillation_loss": 5.842001914978027, "epoch": 1.9, "learning_rate": 4.050718512256974e-05, "loss": 167.1998, "step": 2246, "task_loss": 3.185584545135498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498398996568198, "compression/movement_sparsity/importance_threshold": -5.027198001367368e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9362010646469225, "compression/movement_sparsity/model_sparsity": 0.9040396779849525, "compression_loss": 160.1841278076172, "distillation_loss": 4.510096073150635, "epoch": 1.9, "learning_rate": 4.050295857988166e-05, "loss": 165.4819, "step": 2247, "task_loss": 2.5500590801239014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498438363719819, "compression/movement_sparsity/importance_threshold": -4.90358398404743e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9362972569072411, "compression/movement_sparsity/model_sparsity": 0.9041325657452071, "compression_loss": 160.18699645996094, "distillation_loss": 6.285117149353027, "epoch": 1.9, "learning_rate": 4.0498732037193576e-05, "loss": 166.702, "step": 2248, "task_loss": 3.9513864517211914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4984770801896568, "compression/movement_sparsity/importance_threshold": -4.782013126721114e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9363535866751529, "compression/movement_sparsity/model_sparsity": 0.9041869604123005, "compression_loss": 160.18984985351562, "distillation_loss": 7.873725891113281, "epoch": 1.9, "learning_rate": 4.0494505494505496e-05, "loss": 166.643, "step": 2249, "task_loss": 3.337759256362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498515151400059, "compression/movement_sparsity/importance_threshold": -4.662468403054086e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9363670252120785, "compression/movement_sparsity/model_sparsity": 0.9041999372941409, "compression_loss": 160.19252014160156, "distillation_loss": 7.402373790740967, "epoch": 1.9, "learning_rate": 4.0490278951817415e-05, "loss": 166.4185, "step": 2250, "task_loss": 4.273791313171387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4985525827733743, "compression/movement_sparsity/importance_threshold": -4.544932786713744e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9364439718658327, "compression/movement_sparsity/model_sparsity": 0.9042742405936232, "compression_loss": 160.1952667236328, "distillation_loss": 7.206536293029785, "epoch": 1.9, "learning_rate": 4.0486052409129335e-05, "loss": 166.3253, "step": 2251, "task_loss": 2.7881734371185303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4985893797319507, "compression/movement_sparsity/importance_threshold": -4.4293892513683555e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9365063591109035, "compression/movement_sparsity/model_sparsity": 0.9043344846449002, "compression_loss": 160.1979522705078, "distillation_loss": 7.284627914428711, "epoch": 1.9, "learning_rate": 4.0481825866441255e-05, "loss": 166.9501, "step": 2252, "task_loss": 2.7389354705810547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4986255476981365, "compression/movement_sparsity/importance_threshold": -4.31582077068185e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9365434074997482, "compression/movement_sparsity/model_sparsity": 0.9043702603076138, "compression_loss": 160.2005157470703, "distillation_loss": 6.506577014923096, "epoch": 1.9, "learning_rate": 4.0477599323753174e-05, "loss": 165.6199, "step": 2253, "task_loss": 3.5874550342559814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49866109209428, "compression/movement_sparsity/importance_threshold": -4.204210318323361e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9366264474031644, "compression/movement_sparsity/model_sparsity": 0.904450447534887, "compression_loss": 160.2029571533203, "distillation_loss": 5.827661514282227, "epoch": 1.9, "learning_rate": 4.047337278106509e-05, "loss": 165.3543, "step": 2254, "task_loss": 2.079104423522949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4986960183427291, "compression/movement_sparsity/importance_threshold": -4.0945408679576856e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9366830275785964, "compression/movement_sparsity/model_sparsity": 0.9045050840072321, "compression_loss": 160.20545959472656, "distillation_loss": 8.142450332641602, "epoch": 1.91, "learning_rate": 4.046914623837701e-05, "loss": 167.1443, "step": 2255, "task_loss": 4.10180139541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987303318658323, "compression/movement_sparsity/importance_threshold": -3.986795393252224e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9367423503125849, "compression/movement_sparsity/model_sparsity": 0.9045623688228099, "compression_loss": 160.20797729492188, "distillation_loss": 5.617791175842285, "epoch": 1.91, "learning_rate": 4.046491969568893e-05, "loss": 166.0464, "step": 2256, "task_loss": 3.0598959922790527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987640380859375, "compression/movement_sparsity/importance_threshold": -3.8809568678743744e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9367532132293012, "compression/movement_sparsity/model_sparsity": 0.9045728585649188, "compression_loss": 160.21031188964844, "distillation_loss": 6.853257179260254, "epoch": 1.91, "learning_rate": 4.046069315300085e-05, "loss": 166.9046, "step": 2257, "task_loss": 2.676332712173462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4987971424253932, "compression/movement_sparsity/importance_threshold": -3.777008265489802e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368125836599601, "compression/movement_sparsity/model_sparsity": 0.9046301894386398, "compression_loss": 160.21267700195312, "distillation_loss": 6.59063720703125, "epoch": 1.91, "learning_rate": 4.0456466610312766e-05, "loss": 166.0765, "step": 2258, "task_loss": 2.8540420532226562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4988296503065472, "compression/movement_sparsity/importance_threshold": -3.6749325597667734e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368201078097383, "compression/movement_sparsity/model_sparsity": 0.9046374551107261, "compression_loss": 160.2148895263672, "distillation_loss": 5.941023349761963, "epoch": 1.91, "learning_rate": 4.0452240067624686e-05, "loss": 165.4657, "step": 2259, "task_loss": 2.8525338172912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4988615671517482, "compression/movement_sparsity/importance_threshold": -3.5747127243700855e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368015538048969, "compression/movement_sparsity/model_sparsity": 0.9046195384930298, "compression_loss": 160.2171173095703, "distillation_loss": 6.166594505310059, "epoch": 1.91, "learning_rate": 4.0448013524936606e-05, "loss": 166.8199, "step": 2260, "task_loss": 3.9337363243103027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498892898383344, "compression/movement_sparsity/importance_threshold": -3.476331732968005e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368916885880565, "compression/movement_sparsity/model_sparsity": 0.9047065768691007, "compression_loss": 160.21925354003906, "distillation_loss": 5.930685043334961, "epoch": 1.91, "learning_rate": 4.044378698224852e-05, "loss": 166.1881, "step": 2261, "task_loss": 3.537510871887207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.498923649423683, "compression/movement_sparsity/importance_threshold": -3.3797725592270636e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9368938349382309, "compression/movement_sparsity/model_sparsity": 0.9047086494855437, "compression_loss": 160.22125244140625, "distillation_loss": 6.61134672164917, "epoch": 1.91, "learning_rate": 4.0439560439560445e-05, "loss": 166.3565, "step": 2262, "task_loss": 3.2768218517303467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4989538256951132, "compression/movement_sparsity/importance_threshold": -3.285018176812926e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9369106122420946, "compression/movement_sparsity/model_sparsity": 0.9047248504374067, "compression_loss": 160.2233428955078, "distillation_loss": 7.624445915222168, "epoch": 1.91, "learning_rate": 4.0435333896872365e-05, "loss": 166.6674, "step": 2263, "task_loss": 3.4444010257720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4989834326199831, "compression/movement_sparsity/importance_threshold": -3.1920515593929916e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.936960371793639, "compression/movement_sparsity/model_sparsity": 0.9047729005952774, "compression_loss": 160.22523498535156, "distillation_loss": 8.485793113708496, "epoch": 1.91, "learning_rate": 4.043110735418428e-05, "loss": 167.0411, "step": 2264, "task_loss": 4.363717555999756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990124756206404, "compression/movement_sparsity/importance_threshold": -3.1008556806346593e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9369796054760356, "compression/movement_sparsity/model_sparsity": 0.9047914735415139, "compression_loss": 160.22708129882812, "distillation_loss": 5.798768043518066, "epoch": 1.91, "learning_rate": 4.04268808114962e-05, "loss": 166.468, "step": 2265, "task_loss": 3.9484541416168213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499040960119434, "compression/movement_sparsity/importance_threshold": -3.0114135142027265e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9370010332052773, "compression/movement_sparsity/model_sparsity": 0.9048121651623368, "compression_loss": 160.22891235351562, "distillation_loss": 6.1181488037109375, "epoch": 1.91, "learning_rate": 4.042265426880812e-05, "loss": 166.3589, "step": 2266, "task_loss": 3.781599521636963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990688915387116, "compression/movement_sparsity/importance_threshold": -2.923708033766327e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9370265389998504, "compression/movement_sparsity/model_sparsity": 0.9048367947544013, "compression_loss": 160.23072814941406, "distillation_loss": 5.796014785766602, "epoch": 1.92, "learning_rate": 4.041842772612004e-05, "loss": 165.7493, "step": 2267, "task_loss": 1.9990835189819336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4990962753008215, "compression/movement_sparsity/importance_threshold": -2.8377222129902582e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9371044157386802, "compression/movement_sparsity/model_sparsity": 0.9049119961876754, "compression_loss": 160.2323455810547, "distillation_loss": 6.07520866394043, "epoch": 1.92, "learning_rate": 4.041420118343196e-05, "loss": 166.2371, "step": 2268, "task_loss": 2.0306954383850098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991231168281116, "compression/movement_sparsity/importance_threshold": -2.7534390255427865e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9371220158101108, "compression/movement_sparsity/model_sparsity": 0.9049289916425081, "compression_loss": 160.23385620117188, "distillation_loss": 5.878981590270996, "epoch": 1.92, "learning_rate": 4.040997464074388e-05, "loss": 166.614, "step": 2269, "task_loss": 2.6395576000213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991494215429304, "compression/movement_sparsity/importance_threshold": -2.670841445088709e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9371451129228214, "compression/movement_sparsity/model_sparsity": 0.9049512952983422, "compression_loss": 160.2352752685547, "distillation_loss": 6.553890228271484, "epoch": 1.92, "learning_rate": 4.040574809805579e-05, "loss": 165.7844, "step": 2270, "task_loss": 3.7949657440185547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4991751948676264, "compression/movement_sparsity/importance_threshold": -2.5899124452962927e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.937151385034998, "compression/movement_sparsity/model_sparsity": 0.90495735194417, "compression_loss": 160.23672485351562, "distillation_loss": 6.890753269195557, "epoch": 1.92, "learning_rate": 4.040152155536771e-05, "loss": 165.6783, "step": 2271, "task_loss": 2.9423232078552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992004422245473, "compression/movement_sparsity/importance_threshold": -2.510634999832069e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9372227511782986, "compression/movement_sparsity/model_sparsity": 0.9050262664409005, "compression_loss": 160.2380828857422, "distillation_loss": 8.312797546386719, "epoch": 1.92, "learning_rate": 4.039729501267963e-05, "loss": 166.6976, "step": 2272, "task_loss": 3.2233598232269287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992251690360416, "compression/movement_sparsity/importance_threshold": -2.43299208236257e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9372347945876107, "compression/movement_sparsity/model_sparsity": 0.9050378961220529, "compression_loss": 160.23939514160156, "distillation_loss": 8.048696517944336, "epoch": 1.92, "learning_rate": 4.039306846999155e-05, "loss": 166.8033, "step": 2273, "task_loss": 3.3677070140838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992493807244573, "compression/movement_sparsity/importance_threshold": -2.3569666665543276e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9372657497267934, "compression/movement_sparsity/model_sparsity": 0.9050677878569755, "compression_loss": 160.24057006835938, "distillation_loss": 7.093265533447266, "epoch": 1.92, "learning_rate": 4.038884192730347e-05, "loss": 167.0771, "step": 2274, "task_loss": 3.1502764225006104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992730827121428, "compression/movement_sparsity/importance_threshold": -2.2825417260738737e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9373358042116542, "compression/movement_sparsity/model_sparsity": 0.9051354357547684, "compression_loss": 160.2418212890625, "distillation_loss": 7.007755279541016, "epoch": 1.92, "learning_rate": 4.038461538461539e-05, "loss": 166.8328, "step": 2275, "task_loss": 3.3897366523742676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4992962804214458, "compression/movement_sparsity/importance_threshold": -2.2097002345894748e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.937413907509669, "compression/movement_sparsity/model_sparsity": 0.9052108559642227, "compression_loss": 160.2429962158203, "distillation_loss": 6.155128002166748, "epoch": 1.92, "learning_rate": 4.038038884192731e-05, "loss": 166.252, "step": 2276, "task_loss": 3.0687942504882812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993189792747152, "compression/movement_sparsity/importance_threshold": -2.1384251657650608e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9375080249648189, "compression/movement_sparsity/model_sparsity": 0.905301740195249, "compression_loss": 160.24404907226562, "distillation_loss": 6.239344596862793, "epoch": 1.92, "learning_rate": 4.037616229923922e-05, "loss": 166.59, "step": 2277, "task_loss": 2.195801258087158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993411846942988, "compression/movement_sparsity/importance_threshold": -2.0686994932697655e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9375764100662106, "compression/movement_sparsity/model_sparsity": 0.9053677760580306, "compression_loss": 160.24514770507812, "distillation_loss": 6.321161270141602, "epoch": 1.93, "learning_rate": 4.037193575655114e-05, "loss": 166.8741, "step": 2278, "task_loss": 2.8800103664398193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993629021025447, "compression/movement_sparsity/importance_threshold": -2.0005061907692537e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9376463333852273, "compression/movement_sparsity/model_sparsity": 0.9054352972959299, "compression_loss": 160.2462158203125, "distillation_loss": 6.892311096191406, "epoch": 1.93, "learning_rate": 4.036770921386307e-05, "loss": 166.2419, "step": 2279, "task_loss": 3.235488176345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4993841369218013, "compression/movement_sparsity/importance_threshold": -1.9338282319309244e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.937731841591344, "compression/movement_sparsity/model_sparsity": 0.9055178680321125, "compression_loss": 160.2470245361328, "distillation_loss": 6.462249755859375, "epoch": 1.93, "learning_rate": 4.036348267117498e-05, "loss": 167.0156, "step": 2280, "task_loss": 2.772505760192871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994048945744167, "compression/movement_sparsity/importance_threshold": -1.8686485904213096e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378695538033707, "compression/movement_sparsity/model_sparsity": 0.9056508494060037, "compression_loss": 160.2480010986328, "distillation_loss": 5.56057596206665, "epoch": 1.93, "learning_rate": 4.03592561284869e-05, "loss": 166.0986, "step": 2281, "task_loss": 3.266045331954956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994251804827392, "compression/movement_sparsity/importance_threshold": -1.8049502399060738e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9378897414191781, "compression/movement_sparsity/model_sparsity": 0.9056703435151038, "compression_loss": 160.24879455566406, "distillation_loss": 6.473270893096924, "epoch": 1.93, "learning_rate": 4.035502958579882e-05, "loss": 166.6945, "step": 2282, "task_loss": 2.9187378883361816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994450000691169, "compression/movement_sparsity/importance_threshold": -1.7427161540526162e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9379320960626207, "compression/movement_sparsity/model_sparsity": 0.9057112431462461, "compression_loss": 160.2496337890625, "distillation_loss": 6.201051712036133, "epoch": 1.93, "learning_rate": 4.035080304311073e-05, "loss": 166.3391, "step": 2283, "task_loss": 3.883798837661743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499464358755898, "compression/movement_sparsity/importance_threshold": -1.6819293065283361e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9379536788060416, "compression/movement_sparsity/model_sparsity": 0.9057320844560341, "compression_loss": 160.25018310546875, "distillation_loss": 6.144804954528809, "epoch": 1.93, "learning_rate": 4.034657650042266e-05, "loss": 166.4444, "step": 2284, "task_loss": 2.8304660320281982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4994832619654308, "compression/movement_sparsity/importance_threshold": -1.622572670998898e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9380174373303907, "compression/movement_sparsity/model_sparsity": 0.9057936526789275, "compression_loss": 160.25088500976562, "distillation_loss": 6.457509517669678, "epoch": 1.93, "learning_rate": 4.034234995773458e-05, "loss": 166.3994, "step": 2285, "task_loss": 3.257936716079712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995017151200631, "compression/movement_sparsity/importance_threshold": -1.5646292211325685e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9381504633445363, "compression/movement_sparsity/model_sparsity": 0.9059221088402515, "compression_loss": 160.2515106201172, "distillation_loss": 7.1787238121032715, "epoch": 1.93, "learning_rate": 4.033812341504649e-05, "loss": 167.0358, "step": 2286, "task_loss": 3.381146192550659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995197236421438, "compression/movement_sparsity/importance_threshold": -1.5080819305941448e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9381933664996902, "compression/movement_sparsity/model_sparsity": 0.9059635381400403, "compression_loss": 160.25198364257812, "distillation_loss": 7.339473724365234, "epoch": 1.93, "learning_rate": 4.033389687235841e-05, "loss": 166.7057, "step": 2287, "task_loss": 3.6668620109558105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995372929540205, "compression/movement_sparsity/importance_threshold": -1.452913773051026e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383022937710438, "compression/movement_sparsity/model_sparsity": 0.9060687234245234, "compression_loss": 160.25244140625, "distillation_loss": 6.810704708099365, "epoch": 1.93, "learning_rate": 4.032967032967033e-05, "loss": 166.5957, "step": 2288, "task_loss": 2.7721328735351562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995544284780418, "compression/movement_sparsity/importance_threshold": -1.3991077221706116e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383476652288982, "compression/movement_sparsity/model_sparsity": 0.9061125362332216, "compression_loss": 160.25282287597656, "distillation_loss": 6.215928077697754, "epoch": 1.93, "learning_rate": 4.032544378698225e-05, "loss": 166.6523, "step": 2289, "task_loss": 2.639528512954712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995711356365553, "compression/movement_sparsity/importance_threshold": -1.3466467516194333e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9383736479901768, "compression/movement_sparsity/model_sparsity": 0.9061376264067179, "compression_loss": 160.25323486328125, "distillation_loss": 6.250784873962402, "epoch": 1.94, "learning_rate": 4.032121724429417e-05, "loss": 166.8524, "step": 2290, "task_loss": 2.1177682876586914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4995874198519097, "compression/movement_sparsity/importance_threshold": -1.2955138350631556e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9384423669682622, "compression/movement_sparsity/model_sparsity": 0.9062039846765019, "compression_loss": 160.2536163330078, "distillation_loss": 5.532809257507324, "epoch": 1.94, "learning_rate": 4.031699070160609e-05, "loss": 165.9842, "step": 2291, "task_loss": 4.4165496826171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996032865464533, "compression/movement_sparsity/importance_threshold": -1.2456919461691779e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9385329906422948, "compression/movement_sparsity/model_sparsity": 0.9062914951485403, "compression_loss": 160.25393676757812, "distillation_loss": 6.652081489562988, "epoch": 1.94, "learning_rate": 4.031276415891801e-05, "loss": 166.7904, "step": 2292, "task_loss": 4.348559379577637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499618741142534, "compression/movement_sparsity/importance_threshold": -1.1971640586057666e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9386001356302524, "compression/movement_sparsity/model_sparsity": 0.9063563334995994, "compression_loss": 160.25424194335938, "distillation_loss": 6.511976718902588, "epoch": 1.94, "learning_rate": 4.030853761622992e-05, "loss": 166.1173, "step": 2293, "task_loss": 3.1712534427642822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996337890625, "compression/movement_sparsity/importance_threshold": -1.1499131460368517e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9385707783295328, "compression/movement_sparsity/model_sparsity": 0.9063279847124732, "compression_loss": 160.25454711914062, "distillation_loss": 7.2438225746154785, "epoch": 1.94, "learning_rate": 4.030431107354184e-05, "loss": 166.7937, "step": 2294, "task_loss": 3.1915442943573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996484357286994, "compression/movement_sparsity/importance_threshold": -1.1039221821306996e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9386646096046595, "compression/movement_sparsity/model_sparsity": 0.9064185925946405, "compression_loss": 160.2546844482422, "distillation_loss": 5.350149154663086, "epoch": 1.94, "learning_rate": 4.030008453085376e-05, "loss": 166.2937, "step": 2295, "task_loss": 2.411289691925049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499662686563481, "compression/movement_sparsity/importance_threshold": -1.0591741405538424e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9386838552112238, "compression/movement_sparsity/model_sparsity": 0.9064371770554128, "compression_loss": 160.25479125976562, "distillation_loss": 4.8888044357299805, "epoch": 1.94, "learning_rate": 4.029585798816568e-05, "loss": 165.8791, "step": 2296, "task_loss": 3.549776554107666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996765469891922, "compression/movement_sparsity/importance_threshold": -1.0156519949736792e-06, "compression/movement_sparsity/linear_layer_sparsity": 0.9387504636116377, "compression/movement_sparsity/model_sparsity": 0.9065014972523612, "compression_loss": 160.25489807128906, "distillation_loss": 5.807749271392822, "epoch": 1.94, "learning_rate": 4.02916314454776e-05, "loss": 166.0598, "step": 2297, "task_loss": 3.165010452270508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4996900224281817, "compression/movement_sparsity/importance_threshold": -9.733387190558745e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9388193972247406, "compression/movement_sparsity/model_sparsity": 0.9065680627837894, "compression_loss": 160.2548828125, "distillation_loss": 7.553060531616211, "epoch": 1.94, "learning_rate": 4.028740490278952e-05, "loss": 167.4046, "step": 2298, "task_loss": 4.012598514556885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997031183027976, "compression/movement_sparsity/importance_threshold": -9.322172864669603e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9388033353709351, "compression/movement_sparsity/model_sparsity": 0.9065525527040742, "compression_loss": 160.2548065185547, "distillation_loss": 5.544658184051514, "epoch": 1.94, "learning_rate": 4.0283178360101435e-05, "loss": 166.6705, "step": 2299, "task_loss": 2.5536773204803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997158400353878, "compression/movement_sparsity/importance_threshold": -8.922706708752032e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9388191706655555, "compression/movement_sparsity/model_sparsity": 0.9065678440076093, "compression_loss": 160.25482177734375, "distillation_loss": 6.991927623748779, "epoch": 1.94, "learning_rate": 4.0278951817413355e-05, "loss": 167.0597, "step": 2300, "task_loss": 3.9764349460601807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499728193048301, "compression/movement_sparsity/importance_threshold": -8.534818459454002e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9389276090460361, "compression/movement_sparsity/model_sparsity": 0.9066725571961248, "compression_loss": 160.25469970703125, "distillation_loss": 9.4048433303833, "epoch": 1.94, "learning_rate": 4.027472527472528e-05, "loss": 167.3636, "step": 2301, "task_loss": 3.833726406097412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499740182763885, "compression/movement_sparsity/importance_threshold": -8.158337853458181e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9389631549897586, "compression/movement_sparsity/model_sparsity": 0.9067068820273284, "compression_loss": 160.2545928955078, "distillation_loss": 5.585280895233154, "epoch": 1.95, "learning_rate": 4.0270498732037194e-05, "loss": 165.9951, "step": 2302, "task_loss": 2.1264007091522217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997518146044881, "compression/movement_sparsity/importance_threshold": -7.793094627421213e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9390370013599275, "compression/movement_sparsity/model_sparsity": 0.9067781915475039, "compression_loss": 160.25433349609375, "distillation_loss": 5.731496810913086, "epoch": 1.95, "learning_rate": 4.0266272189349114e-05, "loss": 166.0074, "step": 2303, "task_loss": 3.065538167953491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997630939924584, "compression/movement_sparsity/importance_threshold": -7.438918518025764e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.939011376323678, "compression/movement_sparsity/model_sparsity": 0.9067534468100815, "compression_loss": 160.25421142578125, "distillation_loss": 4.953388690948486, "epoch": 1.95, "learning_rate": 4.0262045646661034e-05, "loss": 166.1986, "step": 2304, "task_loss": 3.1932075023651123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997740263501445, "compression/movement_sparsity/importance_threshold": -7.095639261919806e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9389827583213519, "compression/movement_sparsity/model_sparsity": 0.9067258119241746, "compression_loss": 160.25387573242188, "distillation_loss": 5.227053642272949, "epoch": 1.95, "learning_rate": 4.0257819103972953e-05, "loss": 165.0266, "step": 2305, "task_loss": 2.610239028930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499784617099894, "compression/movement_sparsity/importance_threshold": -6.763086595777332e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9390099334993941, "compression/movement_sparsity/model_sparsity": 0.9067520535512503, "compression_loss": 160.2535858154297, "distillation_loss": 8.27192497253418, "epoch": 1.95, "learning_rate": 4.0253592561284866e-05, "loss": 166.5678, "step": 2306, "task_loss": 3.0572874546051025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4997948716640557, "compression/movement_sparsity/importance_threshold": -6.44109025626366e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9390175768908486, "compression/movement_sparsity/model_sparsity": 0.9067594343686947, "compression_loss": 160.25323486328125, "distillation_loss": 6.735799789428711, "epoch": 1.95, "learning_rate": 4.024936601859679e-05, "loss": 166.4424, "step": 2307, "task_loss": 3.5349838733673096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998047954649776, "compression/movement_sparsity/importance_threshold": -6.129479980052782e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9390480669874935, "compression/movement_sparsity/model_sparsity": 0.9067888770367213, "compression_loss": 160.2529296875, "distillation_loss": 7.471296310424805, "epoch": 1.95, "learning_rate": 4.024513947590871e-05, "loss": 166.3025, "step": 2308, "task_loss": 3.3933815956115723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998143939250075, "compression/movement_sparsity/importance_threshold": -5.828085503818692e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9391163566955442, "compression/movement_sparsity/model_sparsity": 0.9068548207832167, "compression_loss": 160.25267028808594, "distillation_loss": 7.8446125984191895, "epoch": 1.95, "learning_rate": 4.0240912933220626e-05, "loss": 166.5962, "step": 2309, "task_loss": 3.4806113243103027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499823672466494, "compression/movement_sparsity/importance_threshold": -5.53673656420936e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9391452131812229, "compression/movement_sparsity/model_sparsity": 0.9068826859598395, "compression_loss": 160.252197265625, "distillation_loss": 5.635642051696777, "epoch": 1.95, "learning_rate": 4.0236686390532545e-05, "loss": 166.0683, "step": 2310, "task_loss": 3.0456361770629883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499832636511785, "compression/movement_sparsity/importance_threshold": -5.255262897907453e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9392000047315097, "compression/movement_sparsity/model_sparsity": 0.9069355952518153, "compression_loss": 160.2517852783203, "distillation_loss": 8.03728199005127, "epoch": 1.95, "learning_rate": 4.0232459847844465e-05, "loss": 166.6682, "step": 2311, "task_loss": 3.352954626083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499841291483229, "compression/movement_sparsity/importance_threshold": -4.983494241569617e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9392522325857547, "compression/movement_sparsity/model_sparsity": 0.9069860289185955, "compression_loss": 160.2512664794922, "distillation_loss": 8.243897438049316, "epoch": 1.95, "learning_rate": 4.0228233305156385e-05, "loss": 166.4311, "step": 2312, "task_loss": 4.332007884979248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499849642803174, "compression/movement_sparsity/importance_threshold": -4.7212603318611684e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9393562113275395, "compression/movement_sparsity/model_sparsity": 0.9070864356707239, "compression_loss": 160.2508087158203, "distillation_loss": 6.26651668548584, "epoch": 1.95, "learning_rate": 4.0224006762468305e-05, "loss": 166.9993, "step": 2313, "task_loss": 3.6233510971069336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998576958939684, "compression/movement_sparsity/importance_threshold": -4.468390905464775e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9394542637580092, "compression/movement_sparsity/model_sparsity": 0.9071811196985623, "compression_loss": 160.2501983642578, "distillation_loss": 6.953649044036865, "epoch": 1.96, "learning_rate": 4.0219780219780224e-05, "loss": 166.9118, "step": 2314, "task_loss": 3.4671061038970947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998654561779603, "compression/movement_sparsity/importance_threshold": -4.224715699037082e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.939546616436349, "compression/movement_sparsity/model_sparsity": 0.9072702997782911, "compression_loss": 160.24966430664062, "distillation_loss": 7.9676690101623535, "epoch": 1.96, "learning_rate": 4.021555367709214e-05, "loss": 166.4533, "step": 2315, "task_loss": 3.0737318992614746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499872929077498, "compression/movement_sparsity/importance_threshold": -3.990064449243408e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9395063008255722, "compression/movement_sparsity/model_sparsity": 0.9072313691327697, "compression_loss": 160.2492218017578, "distillation_loss": 6.623746871948242, "epoch": 1.96, "learning_rate": 4.021132713440406e-05, "loss": 166.2768, "step": 2316, "task_loss": 3.9193062782287598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998801200149292, "compression/movement_sparsity/importance_threshold": -3.764266892757745e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9395257372188186, "compression/movement_sparsity/model_sparsity": 0.9072501378261149, "compression_loss": 160.24867248535156, "distillation_loss": 5.222832202911377, "epoch": 1.96, "learning_rate": 4.020710059171598e-05, "loss": 166.5427, "step": 2317, "task_loss": 2.873854875564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998870344126027, "compression/movement_sparsity/importance_threshold": -3.5471527662367386e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.939577070760491, "compression/movement_sparsity/model_sparsity": 0.9072997079027103, "compression_loss": 160.24815368652344, "distillation_loss": 7.107813835144043, "epoch": 1.96, "learning_rate": 4.0202874049027896e-05, "loss": 166.4333, "step": 2318, "task_loss": 3.1081695556640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4998936776928664, "compression/movement_sparsity/importance_threshold": -3.338551806363055e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9396959785601559, "compression/movement_sparsity/model_sparsity": 0.9074145308536535, "compression_loss": 160.24745178222656, "distillation_loss": 6.21928596496582, "epoch": 1.96, "learning_rate": 4.0198647506339816e-05, "loss": 166.0708, "step": 2319, "task_loss": 2.2859959602355957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999000552780686, "compression/movement_sparsity/importance_threshold": -3.138293749793339e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.939757209160966, "compression/movement_sparsity/model_sparsity": 0.9074736579949585, "compression_loss": 160.2467803955078, "distillation_loss": 7.165808200836182, "epoch": 1.96, "learning_rate": 4.0194420963651736e-05, "loss": 167.3753, "step": 2320, "task_loss": 4.1401214599609375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999061725905571, "compression/movement_sparsity/importance_threshold": -2.9462083331929095e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9397843724148405, "compression/movement_sparsity/model_sparsity": 0.9074998881074984, "compression_loss": 160.2461700439453, "distillation_loss": 6.096712112426758, "epoch": 1.96, "learning_rate": 4.0190194420963656e-05, "loss": 166.5108, "step": 2321, "task_loss": 3.754952907562256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499912035052681, "compression/movement_sparsity/importance_threshold": -2.7621252932357593e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9397642086473682, "compression/movement_sparsity/model_sparsity": 0.9074804170274698, "compression_loss": 160.2455291748047, "distillation_loss": 6.692794322967529, "epoch": 1.96, "learning_rate": 4.018596787827557e-05, "loss": 166.4531, "step": 2322, "task_loss": 3.667102098464966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999176480867873, "compression/movement_sparsity/importance_threshold": -2.585874366587207e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.939793196298891, "compression/movement_sparsity/model_sparsity": 0.9075084088639864, "compression_loss": 160.24473571777344, "distillation_loss": 6.191159725189209, "epoch": 1.96, "learning_rate": 4.018174133558749e-05, "loss": 166.1407, "step": 2323, "task_loss": 2.7325778007507324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999230171152251, "compression/movement_sparsity/importance_threshold": -2.4172852899125713e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9398451022006099, "compression/movement_sparsity/model_sparsity": 0.9075585316383, "compression_loss": 160.2440185546875, "distillation_loss": 7.136834621429443, "epoch": 1.96, "learning_rate": 4.0177514792899415e-05, "loss": 166.5416, "step": 2324, "task_loss": 3.2274491786956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999281475603423, "compression/movement_sparsity/importance_threshold": -2.2561877998858448e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399011219401632, "compression/movement_sparsity/model_sparsity": 0.9076126269274628, "compression_loss": 160.2433624267578, "distillation_loss": 6.396122455596924, "epoch": 1.96, "learning_rate": 4.017328825021133e-05, "loss": 167.1078, "step": 2325, "task_loss": 3.020205497741699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999330448444872, "compression/movement_sparsity/importance_threshold": -2.1024116331636727e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9399558061729413, "compression/movement_sparsity/model_sparsity": 0.9076654325886165, "compression_loss": 160.2424774169922, "distillation_loss": 6.5041184425354, "epoch": 1.97, "learning_rate": 4.016906170752325e-05, "loss": 166.9327, "step": 2326, "task_loss": 3.6526072025299072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499937714390008, "compression/movement_sparsity/importance_threshold": -1.9557865264200475e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9400235712176158, "compression/movement_sparsity/model_sparsity": 0.907730869695537, "compression_loss": 160.24172973632812, "distillation_loss": 6.3362016677856445, "epoch": 1.97, "learning_rate": 4.016483516483517e-05, "loss": 166.1995, "step": 2327, "task_loss": 3.876760721206665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999421616192525, "compression/movement_sparsity/importance_threshold": -1.8161422163116142e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9400382975646462, "compression/movement_sparsity/model_sparsity": 0.9077450901472431, "compression_loss": 160.24099731445312, "distillation_loss": 5.346047878265381, "epoch": 1.97, "learning_rate": 4.016060862214708e-05, "loss": 165.7456, "step": 2328, "task_loss": 1.9838918447494507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999463919545695, "compression/movement_sparsity/importance_threshold": -1.683308439521039e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9401107845797045, "compression/movement_sparsity/model_sparsity": 0.9078150870103382, "compression_loss": 160.2401580810547, "distillation_loss": 5.978574752807617, "epoch": 1.97, "learning_rate": 4.015638207945901e-05, "loss": 166.1061, "step": 2329, "task_loss": 3.0958638191223145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999504108183066, "compression/movement_sparsity/importance_threshold": -1.5571149327136408e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9401909030620499, "compression/movement_sparsity/model_sparsity": 0.9078924531763417, "compression_loss": 160.2393035888672, "distillation_loss": 6.485856056213379, "epoch": 1.97, "learning_rate": 4.0152155536770927e-05, "loss": 167.1001, "step": 2330, "task_loss": 3.1669461727142334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999542236328125, "compression/movement_sparsity/importance_threshold": -1.4373914325460646e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9402461000340363, "compression/movement_sparsity/model_sparsity": 0.9079457539625346, "compression_loss": 160.23838806152344, "distillation_loss": 6.434739112854004, "epoch": 1.97, "learning_rate": 4.014792899408284e-05, "loss": 166.3486, "step": 2331, "task_loss": 3.4682846069335938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999578358204353, "compression/movement_sparsity/importance_threshold": -1.323967675692303e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9402534095487971, "compression/movement_sparsity/model_sparsity": 0.9079528123729766, "compression_loss": 160.23739624023438, "distillation_loss": 5.510326862335205, "epoch": 1.97, "learning_rate": 4.014370245139476e-05, "loss": 165.9706, "step": 2332, "task_loss": 2.847395420074463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999612528035227, "compression/movement_sparsity/importance_threshold": -1.2166733988176748e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9402802150776426, "compression/movement_sparsity/model_sparsity": 0.9079786970494428, "compression_loss": 160.23663330078125, "distillation_loss": 5.170238494873047, "epoch": 1.97, "learning_rate": 4.013947590870668e-05, "loss": 166.3467, "step": 2333, "task_loss": 3.0937933921813965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999644800044236, "compression/movement_sparsity/importance_threshold": -1.1153383385961724e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9403256342321675, "compression/movement_sparsity/model_sparsity": 0.9080225559162842, "compression_loss": 160.23556518554688, "distillation_loss": 7.272273063659668, "epoch": 1.97, "learning_rate": 4.01352493660186e-05, "loss": 166.3301, "step": 2334, "task_loss": 3.949800491333008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999675228454856, "compression/movement_sparsity/importance_threshold": -1.019792231684441e-07, "compression/movement_sparsity/linear_layer_sparsity": 0.9403562077979859, "compression/movement_sparsity/model_sparsity": 0.9080520791860615, "compression_loss": 160.23471069335938, "distillation_loss": 4.398719310760498, "epoch": 1.97, "learning_rate": 4.013102282333052e-05, "loss": 165.8517, "step": 2335, "task_loss": 2.015611171722412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999703867490572, "compression/movement_sparsity/importance_threshold": -9.298648147564731e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.940412132144198, "compression/movement_sparsity/model_sparsity": 0.9081060823589379, "compression_loss": 160.23365783691406, "distillation_loss": 6.767613410949707, "epoch": 1.97, "learning_rate": 4.012679628064244e-05, "loss": 166.5842, "step": 2336, "task_loss": 3.376575469970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999730771374868, "compression/movement_sparsity/importance_threshold": -8.453858244689139e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9404556672802367, "compression/movement_sparsity/model_sparsity": 0.9081481219291236, "compression_loss": 160.23275756835938, "distillation_loss": 6.922449111938477, "epoch": 1.97, "learning_rate": 4.012256973795436e-05, "loss": 166.3696, "step": 2337, "task_loss": 2.766040802001953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499975599433122, "compression/movement_sparsity/importance_threshold": -7.661849975044294e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9405130582990681, "compression/movement_sparsity/model_sparsity": 0.9082035413899028, "compression_loss": 160.23179626464844, "distillation_loss": 5.656912803649902, "epoch": 1.98, "learning_rate": 4.011834319526627e-05, "loss": 166.3511, "step": 2338, "task_loss": 4.997627258300781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999779590583118, "compression/movement_sparsity/importance_threshold": -6.920920705283384e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.940491415934809, "compression/movement_sparsity/model_sparsity": 0.9081826425074357, "compression_loss": 160.23074340820312, "distillation_loss": 5.92845344543457, "epoch": 1.98, "learning_rate": 4.011411665257819e-05, "loss": 165.1377, "step": 2339, "task_loss": 2.1684250831604004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999801614354036, "compression/movement_sparsity/importance_threshold": -6.229367801972863e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.940503530889127, "compression/movement_sparsity/model_sparsity": 0.908194341275803, "compression_loss": 160.2298583984375, "distillation_loss": 7.2031168937683105, "epoch": 1.98, "learning_rate": 4.010989010989011e-05, "loss": 166.8086, "step": 2340, "task_loss": 3.2679431438446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499982211986746, "compression/movement_sparsity/importance_threshold": -5.585488631765917e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9404995243688014, "compression/movement_sparsity/model_sparsity": 0.908190472391776, "compression_loss": 160.22882080078125, "distillation_loss": 7.116390228271484, "epoch": 1.98, "learning_rate": 4.010566356720203e-05, "loss": 166.1407, "step": 2341, "task_loss": 3.019827127456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999841161346872, "compression/movement_sparsity/importance_threshold": -4.987580561575944e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9405863800058609, "compression/movement_sparsity/model_sparsity": 0.9082743442705035, "compression_loss": 160.22769165039062, "distillation_loss": 7.2136454582214355, "epoch": 1.98, "learning_rate": 4.010143702451395e-05, "loss": 165.974, "step": 2342, "task_loss": 3.615882158279419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999858793015752, "compression/movement_sparsity/importance_threshold": -4.433940957795923e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9406937929079249, "compression/movement_sparsity/model_sparsity": 0.9083780672089407, "compression_loss": 160.22671508789062, "distillation_loss": 7.135324954986572, "epoch": 1.98, "learning_rate": 4.009721048182587e-05, "loss": 167.1272, "step": 2343, "task_loss": 3.465183973312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999875069097586, "compression/movement_sparsity/importance_threshold": -3.9228671872525156e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9407205030434291, "compression/movement_sparsity/model_sparsity": 0.9084038597691204, "compression_loss": 160.2256317138672, "distillation_loss": 6.006848335266113, "epoch": 1.98, "learning_rate": 4.009298393913778e-05, "loss": 165.7744, "step": 2344, "task_loss": 2.4421539306640625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999890043815851, "compression/movement_sparsity/importance_threshold": -3.452656616598909e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9407593996782574, "compression/movement_sparsity/model_sparsity": 0.9084414201848823, "compression_loss": 160.2245635986328, "distillation_loss": 6.364957809448242, "epoch": 1.98, "learning_rate": 4.00887573964497e-05, "loss": 166.7501, "step": 2345, "task_loss": 3.831197500228882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999903771394032, "compression/movement_sparsity/importance_threshold": -3.021606612401556e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9407746864611665, "compression/movement_sparsity/model_sparsity": 0.9084561818197708, "compression_loss": 160.22341918945312, "distillation_loss": 4.808963298797607, "epoch": 1.98, "learning_rate": 4.008453085376163e-05, "loss": 165.3034, "step": 2346, "task_loss": 2.1402463912963867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999916306055607, "compression/movement_sparsity/importance_threshold": -2.628014541487117e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9408673014711942, "compression/movement_sparsity/model_sparsity": 0.908545615219287, "compression_loss": 160.22210693359375, "distillation_loss": 5.781253814697266, "epoch": 1.98, "learning_rate": 4.008030431107354e-05, "loss": 165.6374, "step": 2347, "task_loss": 3.0369365215301514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999927702024065, "compression/movement_sparsity/importance_threshold": -2.2701777704220438e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9408660255852572, "compression/movement_sparsity/model_sparsity": 0.908544383163957, "compression_loss": 160.22091674804688, "distillation_loss": 5.894574165344238, "epoch": 1.98, "learning_rate": 4.007607776838546e-05, "loss": 166.107, "step": 2348, "task_loss": 3.9779584407806396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999938013522884, "compression/movement_sparsity/importance_threshold": -1.946393665859525e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9409916228429658, "compression/movement_sparsity/model_sparsity": 0.9086656657694809, "compression_loss": 160.2196807861328, "distillation_loss": 8.923492431640625, "epoch": 1.99, "learning_rate": 4.007185122569738e-05, "loss": 166.1568, "step": 2349, "task_loss": 3.444336414337158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999947294775544, "compression/movement_sparsity/importance_threshold": -1.6549595946262208e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.9410088532651996, "compression/movement_sparsity/model_sparsity": 0.908682304273704, "compression_loss": 160.21849060058594, "distillation_loss": 5.9663190841674805, "epoch": 1.99, "learning_rate": 4.00676246830093e-05, "loss": 166.3487, "step": 2350, "task_loss": 3.7416346073150635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999955600005528, "compression/movement_sparsity/importance_threshold": -1.3941729232885836e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.941048167245895, "compression/movement_sparsity/model_sparsity": 0.9087202676982186, "compression_loss": 160.21734619140625, "distillation_loss": 6.85717248916626, "epoch": 1.99, "learning_rate": 4.006339814032122e-05, "loss": 167.0489, "step": 2351, "task_loss": 3.1543943881988525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999962983436324, "compression/movement_sparsity/importance_threshold": -1.1623310184130653e-08, "compression/movement_sparsity/linear_layer_sparsity": 0.941123682999533, "compression/movement_sparsity/model_sparsity": 0.9087931892534055, "compression_loss": 160.21609497070312, "distillation_loss": 5.350762367248535, "epoch": 1.99, "learning_rate": 4.005917159763314e-05, "loss": 165.9253, "step": 2352, "task_loss": 3.19876766204834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999969499291403, "compression/movement_sparsity/importance_threshold": -9.577312469130628e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9411966112087939, "compression/movement_sparsity/model_sparsity": 0.9088636121543249, "compression_loss": 160.21475219726562, "distillation_loss": 7.298005104064941, "epoch": 1.99, "learning_rate": 4.005494505494506e-05, "loss": 166.4483, "step": 2353, "task_loss": 3.664341688156128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999975201794253, "compression/movement_sparsity/importance_threshold": -7.786709752682919e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9412855416510222, "compression/movement_sparsity/model_sparsity": 0.9089494875622806, "compression_loss": 160.21347045898438, "distillation_loss": 7.00773286819458, "epoch": 1.99, "learning_rate": 4.005071851225697e-05, "loss": 166.6199, "step": 2354, "task_loss": 2.653808116912842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499998014516836, "compression/movement_sparsity/importance_threshold": -6.23447570218677e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9413031417224527, "compression/movement_sparsity/model_sparsity": 0.9089664830171132, "compression_loss": 160.21218872070312, "distillation_loss": 6.630448818206787, "epoch": 1.99, "learning_rate": 4.004649196956889e-05, "loss": 166.6982, "step": 2355, "task_loss": 3.0347328186035156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.49999843836372, "compression/movement_sparsity/importance_threshold": -4.903583984174065e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.941277588231209, "compression/movement_sparsity/model_sparsity": 0.9089418073669056, "compression_loss": 160.21099853515625, "distillation_loss": 6.061424732208252, "epoch": 1.99, "learning_rate": 4.004226542688081e-05, "loss": 166.0519, "step": 2356, "task_loss": 2.741102933883667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999987971424256, "compression/movement_sparsity/importance_threshold": -3.7770082651766845e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9412779101837352, "compression/movement_sparsity/model_sparsity": 0.908942118259372, "compression_loss": 160.2096710205078, "distillation_loss": 7.33541202545166, "epoch": 1.99, "learning_rate": 4.003803888419273e-05, "loss": 166.7486, "step": 2357, "task_loss": 3.0860352516174316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999990962753007, "compression/movement_sparsity/importance_threshold": -2.8377222134612357e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9413075178919751, "compression/movement_sparsity/model_sparsity": 0.9089707088517499, "compression_loss": 160.2085418701172, "distillation_loss": 6.780506610870361, "epoch": 1.99, "learning_rate": 4.003381234150465e-05, "loss": 166.1792, "step": 2358, "task_loss": 3.4217426776885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999993411846944, "compression/movement_sparsity/importance_threshold": -2.0686994929575153e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9413916667429814, "compression/movement_sparsity/model_sparsity": 0.909051966930852, "compression_loss": 160.20724487304688, "distillation_loss": 7.029179573059082, "epoch": 1.99, "learning_rate": 4.002958579881657e-05, "loss": 166.5161, "step": 2359, "task_loss": 3.1922600269317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999537292954, "compression/movement_sparsity/importance_threshold": -1.4529137727994912e-09, "compression/movement_sparsity/linear_layer_sparsity": 0.9414161232108025, "compression/movement_sparsity/model_sparsity": 0.9090755832437666, "compression_loss": 160.20599365234375, "distillation_loss": 8.545100212097168, "epoch": 1.99, "learning_rate": 4.0025359256128485e-05, "loss": 166.6691, "step": 2360, "task_loss": 3.9525198936462402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999996900224282, "compression/movement_sparsity/importance_threshold": -9.73338718651684e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9414943457504937, "compression/movement_sparsity/model_sparsity": 0.9091511185985788, "compression_loss": 160.20477294921875, "distillation_loss": 5.64668083190918, "epoch": 2.0, "learning_rate": 4.0021132713440405e-05, "loss": 166.3668, "step": 2361, "task_loss": 2.5068724155426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999804795465, "compression/movement_sparsity/importance_threshold": -6.129479979133379e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9415185398866269, "compression/movement_sparsity/model_sparsity": 0.9091744815917059, "compression_loss": 160.2035369873047, "distillation_loss": 8.312286376953125, "epoch": 2.0, "learning_rate": 4.0016906170752324e-05, "loss": 167.4499, "step": 2362, "task_loss": 3.8813138008117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999998870344127, "compression/movement_sparsity/importance_threshold": -3.547152762489736e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9415324673144256, "compression/movement_sparsity/model_sparsity": 0.9091879305695139, "compression_loss": 160.20236206054688, "distillation_loss": 5.018465995788574, "epoch": 2.0, "learning_rate": 4.001267962806425e-05, "loss": 166.0319, "step": 2363, "task_loss": 2.1185457706451416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999999421616192, "compression/movement_sparsity/importance_threshold": -1.8161422192519705e-10, "compression/movement_sparsity/linear_layer_sparsity": 0.9415084043441364, "compression/movement_sparsity/model_sparsity": 0.9091646942362805, "compression_loss": 160.201171875, "distillation_loss": 5.742884635925293, "epoch": 2.0, "learning_rate": 4.0008453085376164e-05, "loss": 165.8484, "step": 2364, "task_loss": 3.038019895553589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.499999975599433, "compression/movement_sparsity/importance_threshold": -7.661849973916723e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9414946438546846, "compression/movement_sparsity/model_sparsity": 0.9091514064619736, "compression_loss": 160.199951171875, "distillation_loss": 6.367249488830566, "epoch": 2.0, "learning_rate": 4.0004226542688084e-05, "loss": 166.3348, "step": 2365, "task_loss": 2.932345390319824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.4999999927702024, "compression/movement_sparsity/importance_threshold": -2.2701777957490066e-11, "compression/movement_sparsity/linear_layer_sparsity": 0.9415118027319127, "compression/movement_sparsity/model_sparsity": 0.9091679758789819, "compression_loss": 160.19873046875, "distillation_loss": 7.022960186004639, "epoch": 2.0, "learning_rate": 4e-05, "loss": 165.5297, "step": 2366, "task_loss": 2.767848491668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 6.727951526641846, "epoch": 2.0, "learning_rate": 3.9995773457311916e-05, "loss": 131.5092, "step": 2367, "task_loss": 3.372413158416748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 5.7696685791015625, "epoch": 2.0, "learning_rate": 3.999154691462384e-05, "loss": 5.3427, "step": 2368, "task_loss": 2.0392656326293945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 5.515368938446045, "epoch": 2.0, "learning_rate": 3.998732037193576e-05, "loss": 5.4626, "step": 2369, "task_loss": 2.3784356117248535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 5.478196144104004, "epoch": 2.0, "learning_rate": 3.9983093829247675e-05, "loss": 4.8103, "step": 2370, "task_loss": 2.407582998275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 4.114841461181641, "epoch": 2.0, "learning_rate": 3.9978867286559595e-05, "loss": 3.4251, "step": 2371, "task_loss": 2.1522226333618164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 4.007223606109619, "epoch": 2.01, "learning_rate": 3.9974640743871515e-05, "loss": 3.8089, "step": 2372, "task_loss": 2.4861936569213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 4.57686710357666, "epoch": 2.01, "learning_rate": 3.9970414201183435e-05, "loss": 3.7541, "step": 2373, "task_loss": 1.73525071144104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.09799861907959, "epoch": 2.01, "learning_rate": 3.9966187658495354e-05, "loss": 3.3156, "step": 2374, "task_loss": 1.3507612943649292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.0171287059783936, "epoch": 2.01, "learning_rate": 3.9961961115807274e-05, "loss": 3.0909, "step": 2375, "task_loss": 1.0064871311187744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.3522822856903076, "epoch": 2.01, "learning_rate": 3.995773457311919e-05, "loss": 2.4917, "step": 2376, "task_loss": 1.5581567287445068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.805544853210449, "epoch": 2.01, "learning_rate": 3.995350803043111e-05, "loss": 3.1072, "step": 2377, "task_loss": 1.9875231981277466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.0497374534606934, "epoch": 2.01, "learning_rate": 3.9949281487743027e-05, "loss": 2.807, "step": 2378, "task_loss": 0.9842453598976135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.2810075283050537, "epoch": 2.01, "learning_rate": 3.9945054945054946e-05, "loss": 3.0397, "step": 2379, "task_loss": 1.8561878204345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.6260640621185303, "epoch": 2.01, "learning_rate": 3.9940828402366866e-05, "loss": 2.7026, "step": 2380, "task_loss": 1.3060163259506226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.07041335105896, "epoch": 2.01, "learning_rate": 3.9936601859678786e-05, "loss": 2.7868, "step": 2381, "task_loss": 1.4494351148605347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.1373894214630127, "epoch": 2.01, "learning_rate": 3.9932375316990706e-05, "loss": 2.22, "step": 2382, "task_loss": 1.4277929067611694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.55580997467041, "epoch": 2.01, "learning_rate": 3.992814877430262e-05, "loss": 2.364, "step": 2383, "task_loss": 1.6325865983963013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.385978937149048, "epoch": 2.02, "learning_rate": 3.992392223161454e-05, "loss": 1.911, "step": 2384, "task_loss": 1.5452477931976318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.232536792755127, "epoch": 2.02, "learning_rate": 3.9919695688926465e-05, "loss": 2.6697, "step": 2385, "task_loss": 1.540758728981018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.6594595909118652, "epoch": 2.02, "learning_rate": 3.991546914623838e-05, "loss": 2.5036, "step": 2386, "task_loss": 1.640219807624817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7822601795196533, "epoch": 2.02, "learning_rate": 3.99112426035503e-05, "loss": 2.1458, "step": 2387, "task_loss": 1.0330513715744019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.7828869819641113, "epoch": 2.02, "learning_rate": 3.990701606086222e-05, "loss": 2.0729, "step": 2388, "task_loss": 0.884263277053833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.0850987434387207, "epoch": 2.02, "learning_rate": 3.990278951817413e-05, "loss": 2.4954, "step": 2389, "task_loss": 1.7540827989578247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5429188013076782, "epoch": 2.02, "learning_rate": 3.989856297548606e-05, "loss": 1.9325, "step": 2390, "task_loss": 1.5614439249038696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.464088797569275, "epoch": 2.02, "learning_rate": 3.9894336432797976e-05, "loss": 1.6323, "step": 2391, "task_loss": 1.041124939918518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.936899185180664, "epoch": 2.02, "learning_rate": 3.9890109890109896e-05, "loss": 2.2428, "step": 2392, "task_loss": 1.192954421043396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7787879705429077, "epoch": 2.02, "learning_rate": 3.988588334742181e-05, "loss": 1.5976, "step": 2393, "task_loss": 2.2010862827301025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.483182668685913, "epoch": 2.02, "learning_rate": 3.988165680473373e-05, "loss": 2.2957, "step": 2394, "task_loss": 1.8986337184906006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.556488513946533, "epoch": 2.02, "learning_rate": 3.987743026204565e-05, "loss": 2.3439, "step": 2395, "task_loss": 1.7637701034545898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.783862590789795, "epoch": 2.03, "learning_rate": 3.987320371935757e-05, "loss": 1.8387, "step": 2396, "task_loss": 1.8849520683288574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7669353485107422, "epoch": 2.03, "learning_rate": 3.986897717666949e-05, "loss": 1.7569, "step": 2397, "task_loss": 1.0393316745758057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2654008865356445, "epoch": 2.03, "learning_rate": 3.986475063398141e-05, "loss": 1.622, "step": 2398, "task_loss": 1.1282707452774048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6770771741867065, "epoch": 2.03, "learning_rate": 3.986052409129332e-05, "loss": 1.7897, "step": 2399, "task_loss": 2.0706143379211426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.143314838409424, "epoch": 2.03, "learning_rate": 3.985629754860524e-05, "loss": 2.0636, "step": 2400, "task_loss": 1.0722198486328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.695676326751709, "epoch": 2.03, "learning_rate": 3.985207100591716e-05, "loss": 1.7997, "step": 2401, "task_loss": 1.2164547443389893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.676777720451355, "epoch": 2.03, "learning_rate": 3.984784446322908e-05, "loss": 1.6675, "step": 2402, "task_loss": 1.9163891077041626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.76197350025177, "epoch": 2.03, "learning_rate": 3.9843617920541e-05, "loss": 1.7632, "step": 2403, "task_loss": 1.0540944337844849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4015312194824219, "epoch": 2.03, "learning_rate": 3.983939137785292e-05, "loss": 1.664, "step": 2404, "task_loss": 2.305607557296753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7427690029144287, "epoch": 2.03, "learning_rate": 3.983516483516483e-05, "loss": 1.801, "step": 2405, "task_loss": 0.9954270124435425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2429957389831543, "epoch": 2.03, "learning_rate": 3.983093829247675e-05, "loss": 1.3147, "step": 2406, "task_loss": 0.6306697726249695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5663480758666992, "epoch": 2.03, "learning_rate": 3.982671174978868e-05, "loss": 1.7001, "step": 2407, "task_loss": 0.8829794526100159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5441477298736572, "epoch": 2.04, "learning_rate": 3.98224852071006e-05, "loss": 1.5441, "step": 2408, "task_loss": 1.432923674583435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4914753437042236, "epoch": 2.04, "learning_rate": 3.981825866441251e-05, "loss": 1.574, "step": 2409, "task_loss": 1.3204939365386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2094545364379883, "epoch": 2.04, "learning_rate": 3.981403212172443e-05, "loss": 1.7408, "step": 2410, "task_loss": 0.5181313157081604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5648179054260254, "epoch": 2.04, "learning_rate": 3.980980557903635e-05, "loss": 1.5979, "step": 2411, "task_loss": 1.1175904273986816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3891541957855225, "epoch": 2.04, "learning_rate": 3.980557903634827e-05, "loss": 1.5099, "step": 2412, "task_loss": 1.4110456705093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3231825828552246, "epoch": 2.04, "learning_rate": 3.980135249366019e-05, "loss": 1.7478, "step": 2413, "task_loss": 0.7278585433959961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2702136039733887, "epoch": 2.04, "learning_rate": 3.979712595097211e-05, "loss": 0.9821, "step": 2414, "task_loss": 0.8774385452270508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9826806783676147, "epoch": 2.04, "learning_rate": 3.979289940828402e-05, "loss": 1.8279, "step": 2415, "task_loss": 1.3961982727050781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2732384204864502, "epoch": 2.04, "learning_rate": 3.978867286559594e-05, "loss": 1.5267, "step": 2416, "task_loss": 1.3630156517028809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1873409748077393, "epoch": 2.04, "learning_rate": 3.978444632290786e-05, "loss": 1.355, "step": 2417, "task_loss": 0.8526145815849304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3411285877227783, "epoch": 2.04, "learning_rate": 3.978021978021978e-05, "loss": 1.4467, "step": 2418, "task_loss": 0.6982748508453369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.341094732284546, "epoch": 2.04, "learning_rate": 3.97759932375317e-05, "loss": 1.7472, "step": 2419, "task_loss": 0.5854294896125793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3771424293518066, "epoch": 2.05, "learning_rate": 3.977176669484362e-05, "loss": 1.4416, "step": 2420, "task_loss": 0.8746435642242432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.424363374710083, "epoch": 2.05, "learning_rate": 3.976754015215554e-05, "loss": 1.7909, "step": 2421, "task_loss": 0.8313206434249878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.655148506164551, "epoch": 2.05, "learning_rate": 3.9763313609467454e-05, "loss": 1.826, "step": 2422, "task_loss": 2.3594305515289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2628995180130005, "epoch": 2.05, "learning_rate": 3.9759087066779374e-05, "loss": 1.6797, "step": 2423, "task_loss": 0.7806808948516846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3123974800109863, "epoch": 2.05, "learning_rate": 3.97548605240913e-05, "loss": 1.5577, "step": 2424, "task_loss": 0.5960263609886169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8879499435424805, "epoch": 2.05, "learning_rate": 3.9750633981403214e-05, "loss": 1.6582, "step": 2425, "task_loss": 1.5674095153808594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.2396693229675293, "epoch": 2.05, "learning_rate": 3.974640743871513e-05, "loss": 1.823, "step": 2426, "task_loss": 2.3232245445251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.502490758895874, "epoch": 2.05, "learning_rate": 3.974218089602705e-05, "loss": 1.2049, "step": 2427, "task_loss": 1.333996295928955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2393107414245605, "epoch": 2.05, "learning_rate": 3.9737954353338966e-05, "loss": 1.4193, "step": 2428, "task_loss": 1.0256779193878174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.13899564743042, "epoch": 2.05, "learning_rate": 3.973372781065089e-05, "loss": 1.5823, "step": 2429, "task_loss": 0.773370623588562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5242952108383179, "epoch": 2.05, "learning_rate": 3.972950126796281e-05, "loss": 1.5033, "step": 2430, "task_loss": 1.3092904090881348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3226540088653564, "epoch": 2.05, "learning_rate": 3.9725274725274725e-05, "loss": 1.6954, "step": 2431, "task_loss": 0.9623933434486389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.5996503829956055, "epoch": 2.06, "learning_rate": 3.9721048182586645e-05, "loss": 1.8684, "step": 2432, "task_loss": 1.4465861320495605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6925464868545532, "epoch": 2.06, "learning_rate": 3.9716821639898565e-05, "loss": 1.1295, "step": 2433, "task_loss": 1.0429103374481201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8419778347015381, "epoch": 2.06, "learning_rate": 3.971259509721048e-05, "loss": 1.2644, "step": 2434, "task_loss": 0.8587064146995544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.551980972290039, "epoch": 2.06, "learning_rate": 3.9708368554522404e-05, "loss": 1.4314, "step": 2435, "task_loss": 1.6084686517715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6862212419509888, "epoch": 2.06, "learning_rate": 3.9704142011834324e-05, "loss": 1.652, "step": 2436, "task_loss": 1.7060869932174683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3340411186218262, "epoch": 2.06, "learning_rate": 3.9699915469146244e-05, "loss": 1.4734, "step": 2437, "task_loss": 0.697928249835968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0594673156738281, "epoch": 2.06, "learning_rate": 3.969568892645816e-05, "loss": 1.5607, "step": 2438, "task_loss": 1.1826533079147339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7003694772720337, "epoch": 2.06, "learning_rate": 3.9691462383770076e-05, "loss": 1.8563, "step": 2439, "task_loss": 1.1646924018859863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4151952266693115, "epoch": 2.06, "learning_rate": 3.9687235841081996e-05, "loss": 1.4569, "step": 2440, "task_loss": 1.145049810409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6371992826461792, "epoch": 2.06, "learning_rate": 3.9683009298393916e-05, "loss": 1.3912, "step": 2441, "task_loss": 0.9268471598625183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.064062237739563, "epoch": 2.06, "learning_rate": 3.9678782755705836e-05, "loss": 1.2124, "step": 2442, "task_loss": 0.448896199464798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.528700828552246, "epoch": 2.07, "learning_rate": 3.9674556213017755e-05, "loss": 1.6382, "step": 2443, "task_loss": 2.4253177642822266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.474086046218872, "epoch": 2.07, "learning_rate": 3.967032967032967e-05, "loss": 1.2923, "step": 2444, "task_loss": 1.7279953956604004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8257012367248535, "epoch": 2.07, "learning_rate": 3.966610312764159e-05, "loss": 1.5224, "step": 2445, "task_loss": 1.5101815462112427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.758450984954834, "epoch": 2.07, "learning_rate": 3.9661876584953515e-05, "loss": 1.4327, "step": 2446, "task_loss": 1.8750847578048706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.162940502166748, "epoch": 2.07, "learning_rate": 3.965765004226543e-05, "loss": 1.3452, "step": 2447, "task_loss": 0.647018313407898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.1899538040161133, "epoch": 2.07, "learning_rate": 3.965342349957735e-05, "loss": 1.5344, "step": 2448, "task_loss": 1.8895554542541504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.339575171470642, "epoch": 2.07, "learning_rate": 3.964919695688927e-05, "loss": 1.0786, "step": 2449, "task_loss": 0.9051080942153931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.655377984046936, "epoch": 2.07, "learning_rate": 3.964497041420119e-05, "loss": 1.0924, "step": 2450, "task_loss": 0.20561100542545319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7912007570266724, "epoch": 2.07, "learning_rate": 3.96407438715131e-05, "loss": 1.1015, "step": 2451, "task_loss": 0.369274765253067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7987916469573975, "epoch": 2.07, "learning_rate": 3.9636517328825026e-05, "loss": 1.3633, "step": 2452, "task_loss": 2.451631546020508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5714248418807983, "epoch": 2.07, "learning_rate": 3.9632290786136946e-05, "loss": 1.3145, "step": 2453, "task_loss": 1.3857307434082031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6587679386138916, "epoch": 2.07, "learning_rate": 3.962806424344886e-05, "loss": 1.5088, "step": 2454, "task_loss": 0.6644378900527954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5347239971160889, "epoch": 2.08, "learning_rate": 3.962383770076078e-05, "loss": 1.4524, "step": 2455, "task_loss": 1.3674412965774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7437262535095215, "epoch": 2.08, "learning_rate": 3.96196111580727e-05, "loss": 1.3974, "step": 2456, "task_loss": 1.4345266819000244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4868624210357666, "epoch": 2.08, "learning_rate": 3.961538461538462e-05, "loss": 1.1858, "step": 2457, "task_loss": 1.7337347269058228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6236227750778198, "epoch": 2.08, "learning_rate": 3.961115807269654e-05, "loss": 1.1912, "step": 2458, "task_loss": 1.0803594589233398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4364078044891357, "epoch": 2.08, "learning_rate": 3.960693153000846e-05, "loss": 1.3049, "step": 2459, "task_loss": 1.3383210897445679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.143260955810547, "epoch": 2.08, "learning_rate": 3.960270498732037e-05, "loss": 1.5919, "step": 2460, "task_loss": 1.4985262155532837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5759220123291016, "epoch": 2.08, "learning_rate": 3.959847844463229e-05, "loss": 1.627, "step": 2461, "task_loss": 1.5590928792953491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4170928001403809, "epoch": 2.08, "learning_rate": 3.959425190194421e-05, "loss": 1.1669, "step": 2462, "task_loss": 0.2683714032173157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9110627174377441, "epoch": 2.08, "learning_rate": 3.959002535925613e-05, "loss": 1.6216, "step": 2463, "task_loss": 0.4389840066432953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7188645601272583, "epoch": 2.08, "learning_rate": 3.958579881656805e-05, "loss": 1.2045, "step": 2464, "task_loss": 1.0582202672958374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7483444213867188, "epoch": 2.08, "learning_rate": 3.958157227387997e-05, "loss": 1.6748, "step": 2465, "task_loss": 1.5068576335906982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5472084283828735, "epoch": 2.08, "learning_rate": 3.957734573119189e-05, "loss": 1.1954, "step": 2466, "task_loss": 1.5050468444824219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.366323709487915, "epoch": 2.09, "learning_rate": 3.95731191885038e-05, "loss": 1.2196, "step": 2467, "task_loss": 1.2034974098205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.377177357673645, "epoch": 2.09, "learning_rate": 3.956889264581572e-05, "loss": 1.176, "step": 2468, "task_loss": 0.8319675326347351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0718408823013306, "epoch": 2.09, "learning_rate": 3.956466610312765e-05, "loss": 1.0282, "step": 2469, "task_loss": 1.5994333028793335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6089887619018555, "epoch": 2.09, "learning_rate": 3.956043956043956e-05, "loss": 1.2418, "step": 2470, "task_loss": 1.8430551290512085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9888797402381897, "epoch": 2.09, "learning_rate": 3.955621301775148e-05, "loss": 1.237, "step": 2471, "task_loss": 1.0910674333572388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0491304397583008, "epoch": 2.09, "learning_rate": 3.95519864750634e-05, "loss": 1.6628, "step": 2472, "task_loss": 1.254642128944397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3919920921325684, "epoch": 2.09, "learning_rate": 3.9547759932375314e-05, "loss": 1.405, "step": 2473, "task_loss": 1.1151994466781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9127373695373535, "epoch": 2.09, "learning_rate": 3.954353338968724e-05, "loss": 1.4587, "step": 2474, "task_loss": 1.230209231376648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8920097947120667, "epoch": 2.09, "learning_rate": 3.953930684699916e-05, "loss": 1.2473, "step": 2475, "task_loss": 0.7124837636947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8591822385787964, "epoch": 2.09, "learning_rate": 3.953508030431107e-05, "loss": 1.2194, "step": 2476, "task_loss": 0.4764343500137329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.038776159286499, "epoch": 2.09, "learning_rate": 3.953085376162299e-05, "loss": 1.3827, "step": 2477, "task_loss": 1.8782124519348145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1613956689834595, "epoch": 2.09, "learning_rate": 3.952662721893491e-05, "loss": 1.3358, "step": 2478, "task_loss": 0.9608829617500305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5416390895843506, "epoch": 2.1, "learning_rate": 3.952240067624683e-05, "loss": 1.0554, "step": 2479, "task_loss": 0.8468848466873169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4891023635864258, "epoch": 2.1, "learning_rate": 3.951817413355875e-05, "loss": 1.4654, "step": 2480, "task_loss": 0.847949743270874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.405877709388733, "epoch": 2.1, "learning_rate": 3.951394759087067e-05, "loss": 1.4796, "step": 2481, "task_loss": 1.156759262084961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9040069580078125, "epoch": 2.1, "learning_rate": 3.950972104818259e-05, "loss": 1.0208, "step": 2482, "task_loss": 0.6528639793395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6594023704528809, "epoch": 2.1, "learning_rate": 3.9505494505494504e-05, "loss": 1.2315, "step": 2483, "task_loss": 0.9274123907089233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0847699642181396, "epoch": 2.1, "learning_rate": 3.9501267962806424e-05, "loss": 0.9808, "step": 2484, "task_loss": 1.0958430767059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1887664794921875, "epoch": 2.1, "learning_rate": 3.9497041420118344e-05, "loss": 1.4002, "step": 2485, "task_loss": 0.9094762206077576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7207974791526794, "epoch": 2.1, "learning_rate": 3.9492814877430263e-05, "loss": 1.5657, "step": 2486, "task_loss": 1.386243224143982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.3925623893737793, "epoch": 2.1, "learning_rate": 3.948858833474218e-05, "loss": 1.6115, "step": 2487, "task_loss": 2.2891972064971924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 3.0303375720977783, "epoch": 2.1, "learning_rate": 3.94843617920541e-05, "loss": 1.9384, "step": 2488, "task_loss": 3.155574321746826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9076354503631592, "epoch": 2.1, "learning_rate": 3.9480135249366016e-05, "loss": 1.5362, "step": 2489, "task_loss": 2.11348295211792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4189292192459106, "epoch": 2.1, "learning_rate": 3.9475908706677936e-05, "loss": 1.2295, "step": 2490, "task_loss": 0.961495041847229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0058842897415161, "epoch": 2.11, "learning_rate": 3.947168216398986e-05, "loss": 1.2178, "step": 2491, "task_loss": 1.0458797216415405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9723291397094727, "epoch": 2.11, "learning_rate": 3.9467455621301775e-05, "loss": 0.9829, "step": 2492, "task_loss": 1.7455593347549438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0847389698028564, "epoch": 2.11, "learning_rate": 3.9463229078613695e-05, "loss": 1.5082, "step": 2493, "task_loss": 1.2623860836029053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0085761547088623, "epoch": 2.11, "learning_rate": 3.9459002535925615e-05, "loss": 1.0233, "step": 2494, "task_loss": 0.835718035697937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9100750088691711, "epoch": 2.11, "learning_rate": 3.9454775993237534e-05, "loss": 1.0876, "step": 2495, "task_loss": 0.506028950214386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7700991630554199, "epoch": 2.11, "learning_rate": 3.9450549450549454e-05, "loss": 1.1632, "step": 2496, "task_loss": 0.43916288018226624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.519012212753296, "epoch": 2.11, "learning_rate": 3.9446322907861374e-05, "loss": 1.1096, "step": 2497, "task_loss": 1.6551190614700317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8177735805511475, "epoch": 2.11, "learning_rate": 3.9442096365173294e-05, "loss": 1.0053, "step": 2498, "task_loss": 1.852524995803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9728920459747314, "epoch": 2.11, "learning_rate": 3.9437869822485207e-05, "loss": 1.2923, "step": 2499, "task_loss": 0.9941496849060059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2772374153137207, "epoch": 2.11, "learning_rate": 3.9433643279797126e-05, "loss": 1.061, "step": 2500, "task_loss": 0.5361504554748535 }, { "epoch": 2.11, "eval_accuracy": 0.855049504950495, "eval_loss": 0.752260684967041, "eval_runtime": 228.5962, "eval_samples_per_second": 110.457, "eval_steps_per_second": 0.866, "step": 2500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5476107597351074, "epoch": 2.11, "learning_rate": 3.9429416737109046e-05, "loss": 0.8758, "step": 2501, "task_loss": 0.9209589958190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5106786489486694, "epoch": 2.11, "learning_rate": 3.9425190194420966e-05, "loss": 1.3833, "step": 2502, "task_loss": 0.9346505403518677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4420406818389893, "epoch": 2.12, "learning_rate": 3.9420963651732885e-05, "loss": 1.3425, "step": 2503, "task_loss": 0.6034866571426392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0130925178527832, "epoch": 2.12, "learning_rate": 3.9416737109044805e-05, "loss": 1.4086, "step": 2504, "task_loss": 0.5354030728340149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9626778364181519, "epoch": 2.12, "learning_rate": 3.941251056635672e-05, "loss": 1.0621, "step": 2505, "task_loss": 1.0288535356521606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.411634087562561, "epoch": 2.12, "learning_rate": 3.940828402366864e-05, "loss": 1.4817, "step": 2506, "task_loss": 1.0778255462646484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5689705610275269, "epoch": 2.12, "learning_rate": 3.940405748098056e-05, "loss": 1.2598, "step": 2507, "task_loss": 0.6316152811050415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.121328592300415, "epoch": 2.12, "learning_rate": 3.9399830938292484e-05, "loss": 1.124, "step": 2508, "task_loss": 1.2931735515594482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6839990615844727, "epoch": 2.12, "learning_rate": 3.93956043956044e-05, "loss": 1.3684, "step": 2509, "task_loss": 1.3924341201782227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.210131287574768, "epoch": 2.12, "learning_rate": 3.939137785291632e-05, "loss": 1.0921, "step": 2510, "task_loss": 0.4956583082675934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.299082636833191, "epoch": 2.12, "learning_rate": 3.938715131022824e-05, "loss": 1.5506, "step": 2511, "task_loss": 1.6400787830352783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9118422269821167, "epoch": 2.12, "learning_rate": 3.938292476754015e-05, "loss": 1.3016, "step": 2512, "task_loss": 0.7700437903404236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.201402187347412, "epoch": 2.12, "learning_rate": 3.9378698224852076e-05, "loss": 0.9514, "step": 2513, "task_loss": 0.7935715317726135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6507834196090698, "epoch": 2.13, "learning_rate": 3.9374471682163996e-05, "loss": 1.3384, "step": 2514, "task_loss": 1.067337989807129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.206497311592102, "epoch": 2.13, "learning_rate": 3.937024513947591e-05, "loss": 1.1819, "step": 2515, "task_loss": 1.0327556133270264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6909717321395874, "epoch": 2.13, "learning_rate": 3.936601859678783e-05, "loss": 1.2015, "step": 2516, "task_loss": 2.1916956901550293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1243207454681396, "epoch": 2.13, "learning_rate": 3.936179205409975e-05, "loss": 0.8962, "step": 2517, "task_loss": 0.9569413661956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1356823444366455, "epoch": 2.13, "learning_rate": 3.935756551141167e-05, "loss": 1.1087, "step": 2518, "task_loss": 0.3992585241794586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7975798845291138, "epoch": 2.13, "learning_rate": 3.935333896872359e-05, "loss": 1.2118, "step": 2519, "task_loss": 1.6944652795791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9308773279190063, "epoch": 2.13, "learning_rate": 3.934911242603551e-05, "loss": 1.3052, "step": 2520, "task_loss": 1.083774209022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2401868104934692, "epoch": 2.13, "learning_rate": 3.934488588334742e-05, "loss": 1.3036, "step": 2521, "task_loss": 1.7014265060424805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6791520714759827, "epoch": 2.13, "learning_rate": 3.934065934065934e-05, "loss": 1.2344, "step": 2522, "task_loss": 1.699588418006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1963629722595215, "epoch": 2.13, "learning_rate": 3.933643279797126e-05, "loss": 1.1654, "step": 2523, "task_loss": 0.5291019082069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.035501480102539, "epoch": 2.13, "learning_rate": 3.933220625528318e-05, "loss": 1.2282, "step": 2524, "task_loss": 1.068690538406372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8149365186691284, "epoch": 2.13, "learning_rate": 3.93279797125951e-05, "loss": 1.2286, "step": 2525, "task_loss": 1.7638338804244995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9684966206550598, "epoch": 2.14, "learning_rate": 3.932375316990702e-05, "loss": 1.0901, "step": 2526, "task_loss": 0.9050475358963013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2119160890579224, "epoch": 2.14, "learning_rate": 3.931952662721894e-05, "loss": 1.0866, "step": 2527, "task_loss": 0.8774319291114807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1510636806488037, "epoch": 2.14, "learning_rate": 3.931530008453085e-05, "loss": 0.9694, "step": 2528, "task_loss": 1.1399471759796143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4225149154663086, "epoch": 2.14, "learning_rate": 3.931107354184277e-05, "loss": 1.1414, "step": 2529, "task_loss": 1.183040738105774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8627368211746216, "epoch": 2.14, "learning_rate": 3.93068469991547e-05, "loss": 1.2972, "step": 2530, "task_loss": 1.6322598457336426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3751482963562012, "epoch": 2.14, "learning_rate": 3.930262045646661e-05, "loss": 1.4511, "step": 2531, "task_loss": 1.8769856691360474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5390530824661255, "epoch": 2.14, "learning_rate": 3.929839391377853e-05, "loss": 1.1242, "step": 2532, "task_loss": 1.0099105834960938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3190017938613892, "epoch": 2.14, "learning_rate": 3.929416737109045e-05, "loss": 1.1558, "step": 2533, "task_loss": 2.2437427043914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2159230709075928, "epoch": 2.14, "learning_rate": 3.9289940828402364e-05, "loss": 1.1851, "step": 2534, "task_loss": 1.2412211894989014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4506877660751343, "epoch": 2.14, "learning_rate": 3.928571428571429e-05, "loss": 1.04, "step": 2535, "task_loss": 0.6821317672729492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6746113300323486, "epoch": 2.14, "learning_rate": 3.928148774302621e-05, "loss": 1.0522, "step": 2536, "task_loss": 1.076552391052246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6370927095413208, "epoch": 2.14, "learning_rate": 3.927726120033813e-05, "loss": 1.4058, "step": 2537, "task_loss": 1.557195782661438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0421838760375977, "epoch": 2.15, "learning_rate": 3.927303465765004e-05, "loss": 1.1944, "step": 2538, "task_loss": 0.919719398021698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8270502686500549, "epoch": 2.15, "learning_rate": 3.926880811496196e-05, "loss": 1.304, "step": 2539, "task_loss": 0.5443058013916016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8390494585037231, "epoch": 2.15, "learning_rate": 3.926458157227388e-05, "loss": 1.0278, "step": 2540, "task_loss": 0.420135498046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.151467204093933, "epoch": 2.15, "learning_rate": 3.92603550295858e-05, "loss": 1.1576, "step": 2541, "task_loss": 1.2587696313858032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9899442195892334, "epoch": 2.15, "learning_rate": 3.925612848689772e-05, "loss": 1.04, "step": 2542, "task_loss": 1.562186360359192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1969377994537354, "epoch": 2.15, "learning_rate": 3.925190194420964e-05, "loss": 1.3074, "step": 2543, "task_loss": 1.1316585540771484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0923492908477783, "epoch": 2.15, "learning_rate": 3.9247675401521554e-05, "loss": 1.1587, "step": 2544, "task_loss": 0.2309933602809906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1078921556472778, "epoch": 2.15, "learning_rate": 3.9243448858833474e-05, "loss": 1.0046, "step": 2545, "task_loss": 1.6790353059768677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0529794692993164, "epoch": 2.15, "learning_rate": 3.9239222316145394e-05, "loss": 1.0305, "step": 2546, "task_loss": 1.0452624559402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7505550384521484, "epoch": 2.15, "learning_rate": 3.923499577345731e-05, "loss": 1.2823, "step": 2547, "task_loss": 1.2958886623382568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1431958675384521, "epoch": 2.15, "learning_rate": 3.923076923076923e-05, "loss": 1.0215, "step": 2548, "task_loss": 0.5999258756637573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.031099796295166, "epoch": 2.15, "learning_rate": 3.922654268808115e-05, "loss": 1.3228, "step": 2549, "task_loss": 1.6370066404342651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4943218231201172, "epoch": 2.16, "learning_rate": 3.9222316145393066e-05, "loss": 1.2516, "step": 2550, "task_loss": 1.4671874046325684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8828465342521667, "epoch": 2.16, "learning_rate": 3.9218089602704986e-05, "loss": 1.0493, "step": 2551, "task_loss": 0.6104852557182312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9021772146224976, "epoch": 2.16, "learning_rate": 3.921386306001691e-05, "loss": 1.0642, "step": 2552, "task_loss": 1.3527050018310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.170947551727295, "epoch": 2.16, "learning_rate": 3.920963651732883e-05, "loss": 0.949, "step": 2553, "task_loss": 1.1193286180496216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.063212275505066, "epoch": 2.16, "learning_rate": 3.9205409974640745e-05, "loss": 0.9539, "step": 2554, "task_loss": 0.4321344494819641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1940295696258545, "epoch": 2.16, "learning_rate": 3.9201183431952664e-05, "loss": 1.1096, "step": 2555, "task_loss": 0.8717199563980103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4430387020111084, "epoch": 2.16, "learning_rate": 3.9196956889264584e-05, "loss": 1.293, "step": 2556, "task_loss": 1.941658616065979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.499438762664795, "epoch": 2.16, "learning_rate": 3.9192730346576504e-05, "loss": 1.2446, "step": 2557, "task_loss": 2.039232015609741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.101204752922058, "epoch": 2.16, "learning_rate": 3.9188503803888424e-05, "loss": 0.8893, "step": 2558, "task_loss": 0.7103560566902161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0475131273269653, "epoch": 2.16, "learning_rate": 3.9184277261200343e-05, "loss": 0.9922, "step": 2559, "task_loss": 2.1834683418273926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2570480108261108, "epoch": 2.16, "learning_rate": 3.9180050718512256e-05, "loss": 1.3178, "step": 2560, "task_loss": 0.6201184988021851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.056185007095337, "epoch": 2.16, "learning_rate": 3.9175824175824176e-05, "loss": 0.8466, "step": 2561, "task_loss": 0.4680480360984802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.105933427810669, "epoch": 2.17, "learning_rate": 3.9171597633136096e-05, "loss": 1.348, "step": 2562, "task_loss": 1.060158610343933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2494783401489258, "epoch": 2.17, "learning_rate": 3.9167371090448016e-05, "loss": 1.0974, "step": 2563, "task_loss": 0.6414755582809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8980710506439209, "epoch": 2.17, "learning_rate": 3.9163144547759935e-05, "loss": 0.8759, "step": 2564, "task_loss": 1.0450607538223267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9811490774154663, "epoch": 2.17, "learning_rate": 3.9158918005071855e-05, "loss": 1.058, "step": 2565, "task_loss": 0.25542783737182617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7080172300338745, "epoch": 2.17, "learning_rate": 3.9154691462383775e-05, "loss": 1.1566, "step": 2566, "task_loss": 1.6732734441757202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.152398705482483, "epoch": 2.17, "learning_rate": 3.915046491969569e-05, "loss": 1.3252, "step": 2567, "task_loss": 1.004660964012146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1715047359466553, "epoch": 2.17, "learning_rate": 3.914623837700761e-05, "loss": 0.8931, "step": 2568, "task_loss": 0.5798588991165161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8858317136764526, "epoch": 2.17, "learning_rate": 3.9142011834319534e-05, "loss": 1.2267, "step": 2569, "task_loss": 0.7226612567901611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0393520593643188, "epoch": 2.17, "learning_rate": 3.913778529163145e-05, "loss": 0.9466, "step": 2570, "task_loss": 0.22246521711349487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9375803470611572, "epoch": 2.17, "learning_rate": 3.913355874894337e-05, "loss": 1.2883, "step": 2571, "task_loss": 0.997008740901947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.101149320602417, "epoch": 2.17, "learning_rate": 3.9129332206255286e-05, "loss": 0.9537, "step": 2572, "task_loss": 1.194193959236145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6067936420440674, "epoch": 2.17, "learning_rate": 3.91251056635672e-05, "loss": 0.8612, "step": 2573, "task_loss": 0.547789454460144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1956615447998047, "epoch": 2.18, "learning_rate": 3.912087912087912e-05, "loss": 1.2692, "step": 2574, "task_loss": 1.2824865579605103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1936125755310059, "epoch": 2.18, "learning_rate": 3.9116652578191046e-05, "loss": 1.0536, "step": 2575, "task_loss": 0.8501572012901306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1733366250991821, "epoch": 2.18, "learning_rate": 3.911242603550296e-05, "loss": 1.263, "step": 2576, "task_loss": 1.5960862636566162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9040282368659973, "epoch": 2.18, "learning_rate": 3.910819949281488e-05, "loss": 1.0244, "step": 2577, "task_loss": 0.7344114184379578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9368060827255249, "epoch": 2.18, "learning_rate": 3.91039729501268e-05, "loss": 0.8905, "step": 2578, "task_loss": 1.2730581760406494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8898172378540039, "epoch": 2.18, "learning_rate": 3.909974640743871e-05, "loss": 1.3308, "step": 2579, "task_loss": 1.2473318576812744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4191974401474, "epoch": 2.18, "learning_rate": 3.909551986475064e-05, "loss": 1.3315, "step": 2580, "task_loss": 0.5722655057907104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0766643285751343, "epoch": 2.18, "learning_rate": 3.909129332206256e-05, "loss": 0.9769, "step": 2581, "task_loss": 0.3471655249595642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.018371343612671, "epoch": 2.18, "learning_rate": 3.908706677937448e-05, "loss": 0.9782, "step": 2582, "task_loss": 0.5412055850028992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2503197193145752, "epoch": 2.18, "learning_rate": 3.908284023668639e-05, "loss": 0.871, "step": 2583, "task_loss": 1.1371228694915771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2818759679794312, "epoch": 2.18, "learning_rate": 3.907861369399831e-05, "loss": 1.0969, "step": 2584, "task_loss": 1.6001414060592651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0746575593948364, "epoch": 2.19, "learning_rate": 3.907438715131023e-05, "loss": 0.8931, "step": 2585, "task_loss": 0.4314166009426117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4091876745224, "epoch": 2.19, "learning_rate": 3.907016060862215e-05, "loss": 1.0904, "step": 2586, "task_loss": 0.9434720873832703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6515042781829834, "epoch": 2.19, "learning_rate": 3.906593406593407e-05, "loss": 0.9138, "step": 2587, "task_loss": 0.8693208694458008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.15598464012146, "epoch": 2.19, "learning_rate": 3.906170752324599e-05, "loss": 1.0352, "step": 2588, "task_loss": 1.2237881422042847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9320387244224548, "epoch": 2.19, "learning_rate": 3.90574809805579e-05, "loss": 0.9555, "step": 2589, "task_loss": 1.5926427841186523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3917443752288818, "epoch": 2.19, "learning_rate": 3.905325443786982e-05, "loss": 1.4074, "step": 2590, "task_loss": 1.4200363159179688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2033021450042725, "epoch": 2.19, "learning_rate": 3.904902789518174e-05, "loss": 0.9249, "step": 2591, "task_loss": 1.2091560363769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0083342790603638, "epoch": 2.19, "learning_rate": 3.904480135249366e-05, "loss": 0.9561, "step": 2592, "task_loss": 0.9786311984062195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7833329439163208, "epoch": 2.19, "learning_rate": 3.904057480980558e-05, "loss": 0.587, "step": 2593, "task_loss": 0.47942662239074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6736243963241577, "epoch": 2.19, "learning_rate": 3.90363482671175e-05, "loss": 1.1556, "step": 2594, "task_loss": 1.3562278747558594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0056538581848145, "epoch": 2.19, "learning_rate": 3.903212172442942e-05, "loss": 0.8437, "step": 2595, "task_loss": 1.1796281337738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5997281074523926, "epoch": 2.19, "learning_rate": 3.902789518174133e-05, "loss": 0.9662, "step": 2596, "task_loss": 1.2729130983352661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7086240649223328, "epoch": 2.2, "learning_rate": 3.902366863905326e-05, "loss": 0.934, "step": 2597, "task_loss": 0.572425365447998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3163812160491943, "epoch": 2.2, "learning_rate": 3.901944209636518e-05, "loss": 1.118, "step": 2598, "task_loss": 0.6805679202079773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2768546342849731, "epoch": 2.2, "learning_rate": 3.901521555367709e-05, "loss": 1.0428, "step": 2599, "task_loss": 1.9813642501831055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8333299160003662, "epoch": 2.2, "learning_rate": 3.901098901098901e-05, "loss": 0.7286, "step": 2600, "task_loss": 0.9594441056251526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6751474142074585, "epoch": 2.2, "learning_rate": 3.900676246830093e-05, "loss": 1.0864, "step": 2601, "task_loss": 0.8831893801689148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2950180768966675, "epoch": 2.2, "learning_rate": 3.900253592561285e-05, "loss": 1.134, "step": 2602, "task_loss": 0.8364415764808655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4203542470932007, "epoch": 2.2, "learning_rate": 3.899830938292477e-05, "loss": 1.1994, "step": 2603, "task_loss": 1.4373823404312134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9169692993164062, "epoch": 2.2, "learning_rate": 3.899408284023669e-05, "loss": 1.087, "step": 2604, "task_loss": 1.1711246967315674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4896659851074219, "epoch": 2.2, "learning_rate": 3.8989856297548604e-05, "loss": 1.0821, "step": 2605, "task_loss": 0.6582728624343872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7764432430267334, "epoch": 2.2, "learning_rate": 3.8985629754860524e-05, "loss": 0.8271, "step": 2606, "task_loss": 0.7289114594459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2794256210327148, "epoch": 2.2, "learning_rate": 3.8981403212172443e-05, "loss": 1.0386, "step": 2607, "task_loss": 0.867957592010498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4030076265335083, "epoch": 2.2, "learning_rate": 3.897717666948436e-05, "loss": 1.3621, "step": 2608, "task_loss": 1.2202754020690918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9415581226348877, "epoch": 2.21, "learning_rate": 3.897295012679628e-05, "loss": 1.2506, "step": 2609, "task_loss": 1.256840705871582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.242197036743164, "epoch": 2.21, "learning_rate": 3.89687235841082e-05, "loss": 0.8058, "step": 2610, "task_loss": 0.2458876073360443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2898988723754883, "epoch": 2.21, "learning_rate": 3.896449704142012e-05, "loss": 1.1893, "step": 2611, "task_loss": 2.9621353149414062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5680139064788818, "epoch": 2.21, "learning_rate": 3.8960270498732035e-05, "loss": 1.2619, "step": 2612, "task_loss": 1.2702432870864868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1674429178237915, "epoch": 2.21, "learning_rate": 3.8956043956043955e-05, "loss": 1.2152, "step": 2613, "task_loss": 0.5675352811813354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5425143241882324, "epoch": 2.21, "learning_rate": 3.895181741335588e-05, "loss": 1.1851, "step": 2614, "task_loss": 0.25126349925994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0818455219268799, "epoch": 2.21, "learning_rate": 3.8947590870667795e-05, "loss": 0.9039, "step": 2615, "task_loss": 1.5480843782424927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9032998085021973, "epoch": 2.21, "learning_rate": 3.8943364327979714e-05, "loss": 1.0538, "step": 2616, "task_loss": 1.5938329696655273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2177902460098267, "epoch": 2.21, "learning_rate": 3.8939137785291634e-05, "loss": 1.2493, "step": 2617, "task_loss": 1.9349310398101807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1987149715423584, "epoch": 2.21, "learning_rate": 3.893491124260355e-05, "loss": 1.0721, "step": 2618, "task_loss": 0.4543725550174713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0904655456542969, "epoch": 2.21, "learning_rate": 3.8930684699915474e-05, "loss": 0.8865, "step": 2619, "task_loss": 1.2245304584503174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2970377206802368, "epoch": 2.21, "learning_rate": 3.892645815722739e-05, "loss": 1.1067, "step": 2620, "task_loss": 1.9518994092941284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7272325754165649, "epoch": 2.22, "learning_rate": 3.8922231614539306e-05, "loss": 0.9174, "step": 2621, "task_loss": 0.597977876663208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.22951340675354, "epoch": 2.22, "learning_rate": 3.8918005071851226e-05, "loss": 0.8941, "step": 2622, "task_loss": 1.1773128509521484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3784542679786682, "epoch": 2.22, "learning_rate": 3.8913778529163146e-05, "loss": 1.1644, "step": 2623, "task_loss": 0.5718308687210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9610483050346375, "epoch": 2.22, "learning_rate": 3.8909551986475065e-05, "loss": 1.2162, "step": 2624, "task_loss": 0.6378340125083923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4220547676086426, "epoch": 2.22, "learning_rate": 3.8905325443786985e-05, "loss": 1.169, "step": 2625, "task_loss": 1.0365341901779175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9056533575057983, "epoch": 2.22, "learning_rate": 3.8901098901098905e-05, "loss": 1.2776, "step": 2626, "task_loss": 1.1235480308532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0311174392700195, "epoch": 2.22, "learning_rate": 3.8896872358410825e-05, "loss": 0.9669, "step": 2627, "task_loss": 0.672943115234375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9686775803565979, "epoch": 2.22, "learning_rate": 3.889264581572274e-05, "loss": 0.862, "step": 2628, "task_loss": 1.31742525100708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3354463577270508, "epoch": 2.22, "learning_rate": 3.888841927303466e-05, "loss": 0.8191, "step": 2629, "task_loss": 1.3126877546310425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1685891151428223, "epoch": 2.22, "learning_rate": 3.888419273034658e-05, "loss": 0.7217, "step": 2630, "task_loss": 0.5768212676048279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4074264764785767, "epoch": 2.22, "learning_rate": 3.88799661876585e-05, "loss": 1.0111, "step": 2631, "task_loss": 0.9191560745239258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7551029920578003, "epoch": 2.22, "learning_rate": 3.8875739644970417e-05, "loss": 0.8886, "step": 2632, "task_loss": 0.31499430537223816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5872465372085571, "epoch": 2.23, "learning_rate": 3.8871513102282336e-05, "loss": 1.1038, "step": 2633, "task_loss": 0.927814245223999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1226297616958618, "epoch": 2.23, "learning_rate": 3.886728655959425e-05, "loss": 1.1559, "step": 2634, "task_loss": 0.9361493587493896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9992572069168091, "epoch": 2.23, "learning_rate": 3.886306001690617e-05, "loss": 0.9598, "step": 2635, "task_loss": 1.009710669517517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5561962127685547, "epoch": 2.23, "learning_rate": 3.8858833474218096e-05, "loss": 0.9598, "step": 2636, "task_loss": 0.21820677816867828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0053800344467163, "epoch": 2.23, "learning_rate": 3.885460693153001e-05, "loss": 1.123, "step": 2637, "task_loss": 0.43604540824890137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5035459995269775, "epoch": 2.23, "learning_rate": 3.885038038884193e-05, "loss": 0.9778, "step": 2638, "task_loss": 0.06432268768548965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.282610535621643, "epoch": 2.23, "learning_rate": 3.884615384615385e-05, "loss": 1.0077, "step": 2639, "task_loss": 0.8221561312675476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9048745036125183, "epoch": 2.23, "learning_rate": 3.884192730346577e-05, "loss": 1.0226, "step": 2640, "task_loss": 1.1490238904953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9234331846237183, "epoch": 2.23, "learning_rate": 3.883770076077769e-05, "loss": 1.1303, "step": 2641, "task_loss": 2.1652305126190186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6388466358184814, "epoch": 2.23, "learning_rate": 3.883347421808961e-05, "loss": 0.9597, "step": 2642, "task_loss": 1.337090015411377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8627198934555054, "epoch": 2.23, "learning_rate": 3.882924767540153e-05, "loss": 0.8972, "step": 2643, "task_loss": 1.14003324508667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8966836929321289, "epoch": 2.23, "learning_rate": 3.882502113271344e-05, "loss": 1.1171, "step": 2644, "task_loss": 0.6047772765159607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9539769887924194, "epoch": 2.24, "learning_rate": 3.882079459002536e-05, "loss": 1.0067, "step": 2645, "task_loss": 1.5571770668029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8053699135780334, "epoch": 2.24, "learning_rate": 3.881656804733728e-05, "loss": 1.0571, "step": 2646, "task_loss": 0.4708690047264099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6779573559761047, "epoch": 2.24, "learning_rate": 3.88123415046492e-05, "loss": 0.9754, "step": 2647, "task_loss": 0.3710586428642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6937364339828491, "epoch": 2.24, "learning_rate": 3.880811496196112e-05, "loss": 0.9141, "step": 2648, "task_loss": 0.5804961919784546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7161368131637573, "epoch": 2.24, "learning_rate": 3.880388841927304e-05, "loss": 0.9676, "step": 2649, "task_loss": 1.038461446762085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4778826236724854, "epoch": 2.24, "learning_rate": 3.879966187658495e-05, "loss": 1.0377, "step": 2650, "task_loss": 1.319225549697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9156336784362793, "epoch": 2.24, "learning_rate": 3.879543533389687e-05, "loss": 1.1271, "step": 2651, "task_loss": 0.8085601329803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8943049311637878, "epoch": 2.24, "learning_rate": 3.879120879120879e-05, "loss": 0.983, "step": 2652, "task_loss": 0.5118027925491333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9428730607032776, "epoch": 2.24, "learning_rate": 3.878698224852072e-05, "loss": 0.8289, "step": 2653, "task_loss": 1.4936695098876953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0826247930526733, "epoch": 2.24, "learning_rate": 3.878275570583263e-05, "loss": 1.1184, "step": 2654, "task_loss": 1.79459547996521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7982751727104187, "epoch": 2.24, "learning_rate": 3.877852916314455e-05, "loss": 0.8941, "step": 2655, "task_loss": 0.6158744692802429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9716556072235107, "epoch": 2.24, "learning_rate": 3.877430262045647e-05, "loss": 1.1512, "step": 2656, "task_loss": 1.2565444707870483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7928158640861511, "epoch": 2.25, "learning_rate": 3.877007607776838e-05, "loss": 1.046, "step": 2657, "task_loss": 1.7453557252883911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9852041006088257, "epoch": 2.25, "learning_rate": 3.876584953508031e-05, "loss": 1.0869, "step": 2658, "task_loss": 2.253538131713867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6725125312805176, "epoch": 2.25, "learning_rate": 3.876162299239223e-05, "loss": 1.195, "step": 2659, "task_loss": 1.124234914779663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9993510246276855, "epoch": 2.25, "learning_rate": 3.875739644970414e-05, "loss": 1.0778, "step": 2660, "task_loss": 0.8013314604759216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.948816180229187, "epoch": 2.25, "learning_rate": 3.875316990701606e-05, "loss": 1.0504, "step": 2661, "task_loss": 0.5792979598045349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8851001262664795, "epoch": 2.25, "learning_rate": 3.874894336432798e-05, "loss": 0.8263, "step": 2662, "task_loss": 1.1115994453430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.814497709274292, "epoch": 2.25, "learning_rate": 3.87447168216399e-05, "loss": 0.9557, "step": 2663, "task_loss": 1.1476298570632935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6598769426345825, "epoch": 2.25, "learning_rate": 3.874049027895182e-05, "loss": 0.6931, "step": 2664, "task_loss": 0.7893716096878052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9944308996200562, "epoch": 2.25, "learning_rate": 3.873626373626374e-05, "loss": 0.9943, "step": 2665, "task_loss": 1.0406333208084106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.307536005973816, "epoch": 2.25, "learning_rate": 3.8732037193575654e-05, "loss": 0.9512, "step": 2666, "task_loss": 1.6902304887771606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2127697467803955, "epoch": 2.25, "learning_rate": 3.8727810650887574e-05, "loss": 1.1893, "step": 2667, "task_loss": 0.6091403961181641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1961246728897095, "epoch": 2.26, "learning_rate": 3.872358410819949e-05, "loss": 1.1924, "step": 2668, "task_loss": 1.3169676065444946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5935120582580566, "epoch": 2.26, "learning_rate": 3.871935756551141e-05, "loss": 1.0087, "step": 2669, "task_loss": 1.368773102760315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4729667901992798, "epoch": 2.26, "learning_rate": 3.871513102282333e-05, "loss": 0.7875, "step": 2670, "task_loss": 0.21085739135742188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8610870838165283, "epoch": 2.26, "learning_rate": 3.871090448013525e-05, "loss": 1.3971, "step": 2671, "task_loss": 1.9653570652008057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6469870805740356, "epoch": 2.26, "learning_rate": 3.870667793744717e-05, "loss": 1.0285, "step": 2672, "task_loss": 1.5969148874282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1149704456329346, "epoch": 2.26, "learning_rate": 3.8702451394759085e-05, "loss": 0.8146, "step": 2673, "task_loss": 0.7556873559951782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1249204874038696, "epoch": 2.26, "learning_rate": 3.8698224852071005e-05, "loss": 0.8217, "step": 2674, "task_loss": 1.1029245853424072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3365437984466553, "epoch": 2.26, "learning_rate": 3.869399830938293e-05, "loss": 0.9237, "step": 2675, "task_loss": 0.46681517362594604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9106525182723999, "epoch": 2.26, "learning_rate": 3.8689771766694844e-05, "loss": 1.2073, "step": 2676, "task_loss": 0.5507681369781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3034907579421997, "epoch": 2.26, "learning_rate": 3.8685545224006764e-05, "loss": 1.0074, "step": 2677, "task_loss": 0.78775954246521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7447793483734131, "epoch": 2.26, "learning_rate": 3.8681318681318684e-05, "loss": 1.0321, "step": 2678, "task_loss": 0.6275745630264282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4194958209991455, "epoch": 2.26, "learning_rate": 3.86770921386306e-05, "loss": 0.9003, "step": 2679, "task_loss": 0.18546538054943085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.224048137664795, "epoch": 2.27, "learning_rate": 3.867286559594252e-05, "loss": 1.056, "step": 2680, "task_loss": 0.8362459540367126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8359700441360474, "epoch": 2.27, "learning_rate": 3.866863905325444e-05, "loss": 0.8212, "step": 2681, "task_loss": 0.8788122534751892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8226076364517212, "epoch": 2.27, "learning_rate": 3.866441251056636e-05, "loss": 1.0566, "step": 2682, "task_loss": 0.4080098867416382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.709947943687439, "epoch": 2.27, "learning_rate": 3.8660185967878276e-05, "loss": 0.8677, "step": 2683, "task_loss": 0.6433559060096741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8629521131515503, "epoch": 2.27, "learning_rate": 3.8655959425190196e-05, "loss": 0.9927, "step": 2684, "task_loss": 0.8281159400939941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6922454237937927, "epoch": 2.27, "learning_rate": 3.8651732882502115e-05, "loss": 0.6941, "step": 2685, "task_loss": 1.3576430082321167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8792062997817993, "epoch": 2.27, "learning_rate": 3.8647506339814035e-05, "loss": 0.7034, "step": 2686, "task_loss": 0.6500823497772217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5596905946731567, "epoch": 2.27, "learning_rate": 3.8643279797125955e-05, "loss": 1.2937, "step": 2687, "task_loss": 0.6133275032043457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1048552989959717, "epoch": 2.27, "learning_rate": 3.8639053254437874e-05, "loss": 1.2472, "step": 2688, "task_loss": 0.7475387454032898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8646892309188843, "epoch": 2.27, "learning_rate": 3.863482671174979e-05, "loss": 1.0369, "step": 2689, "task_loss": 0.9499238133430481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.389082908630371, "epoch": 2.27, "learning_rate": 3.863060016906171e-05, "loss": 1.1229, "step": 2690, "task_loss": 1.7066051959991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.062046766281128, "epoch": 2.27, "learning_rate": 3.862637362637363e-05, "loss": 0.9872, "step": 2691, "task_loss": 1.264611840248108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.625060498714447, "epoch": 2.28, "learning_rate": 3.862214708368555e-05, "loss": 0.7797, "step": 2692, "task_loss": 0.7085673809051514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.76900315284729, "epoch": 2.28, "learning_rate": 3.8617920540997466e-05, "loss": 1.014, "step": 2693, "task_loss": 0.7663934826850891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8141089677810669, "epoch": 2.28, "learning_rate": 3.8613693998309386e-05, "loss": 0.8539, "step": 2694, "task_loss": 1.802156925201416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8502638339996338, "epoch": 2.28, "learning_rate": 3.86094674556213e-05, "loss": 0.892, "step": 2695, "task_loss": 1.2902977466583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4483528733253479, "epoch": 2.28, "learning_rate": 3.860524091293322e-05, "loss": 0.7045, "step": 2696, "task_loss": 0.539362370967865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8091086149215698, "epoch": 2.28, "learning_rate": 3.8601014370245145e-05, "loss": 0.9149, "step": 2697, "task_loss": 0.44426122307777405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6857091188430786, "epoch": 2.28, "learning_rate": 3.8596787827557065e-05, "loss": 0.9683, "step": 2698, "task_loss": 1.0794121026992798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.332659125328064, "epoch": 2.28, "learning_rate": 3.859256128486898e-05, "loss": 1.1469, "step": 2699, "task_loss": 1.8514037132263184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8159573078155518, "epoch": 2.28, "learning_rate": 3.85883347421809e-05, "loss": 0.9481, "step": 2700, "task_loss": 0.4771418571472168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7059981822967529, "epoch": 2.28, "learning_rate": 3.858410819949282e-05, "loss": 0.7462, "step": 2701, "task_loss": 1.3292932510375977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7080508470535278, "epoch": 2.28, "learning_rate": 3.857988165680473e-05, "loss": 0.7322, "step": 2702, "task_loss": 0.42925596237182617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8194615840911865, "epoch": 2.28, "learning_rate": 3.857565511411666e-05, "loss": 0.7444, "step": 2703, "task_loss": 0.7658073902130127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3528589010238647, "epoch": 2.29, "learning_rate": 3.857142857142858e-05, "loss": 1.0941, "step": 2704, "task_loss": 1.7712552547454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8303042054176331, "epoch": 2.29, "learning_rate": 3.856720202874049e-05, "loss": 1.0835, "step": 2705, "task_loss": 1.4435240030288696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7866034507751465, "epoch": 2.29, "learning_rate": 3.856297548605241e-05, "loss": 0.8697, "step": 2706, "task_loss": 1.0457793474197388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9597992897033691, "epoch": 2.29, "learning_rate": 3.855874894336433e-05, "loss": 1.1014, "step": 2707, "task_loss": 0.6245673894882202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8721973299980164, "epoch": 2.29, "learning_rate": 3.855452240067625e-05, "loss": 0.7514, "step": 2708, "task_loss": 1.620715856552124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7276350259780884, "epoch": 2.29, "learning_rate": 3.855029585798817e-05, "loss": 0.9331, "step": 2709, "task_loss": 1.8006185293197632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46855589747428894, "epoch": 2.29, "learning_rate": 3.854606931530009e-05, "loss": 0.533, "step": 2710, "task_loss": 0.711432158946991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49448275566101074, "epoch": 2.29, "learning_rate": 3.854184277261201e-05, "loss": 1.0711, "step": 2711, "task_loss": 0.33237916231155396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9341626167297363, "epoch": 2.29, "learning_rate": 3.853761622992392e-05, "loss": 0.9384, "step": 2712, "task_loss": 0.8614869117736816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.186208724975586, "epoch": 2.29, "learning_rate": 3.853338968723584e-05, "loss": 0.8829, "step": 2713, "task_loss": 0.6522613167762756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0377975702285767, "epoch": 2.29, "learning_rate": 3.852916314454777e-05, "loss": 0.9292, "step": 2714, "task_loss": 0.5374699234962463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9838881492614746, "epoch": 2.29, "learning_rate": 3.852493660185968e-05, "loss": 0.9744, "step": 2715, "task_loss": 1.5493029356002808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4743983745574951, "epoch": 2.3, "learning_rate": 3.85207100591716e-05, "loss": 1.1343, "step": 2716, "task_loss": 2.1171298027038574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7049221992492676, "epoch": 2.3, "learning_rate": 3.851648351648352e-05, "loss": 0.7853, "step": 2717, "task_loss": 0.9822770953178406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7248736619949341, "epoch": 2.3, "learning_rate": 3.851225697379543e-05, "loss": 0.756, "step": 2718, "task_loss": 1.1020175218582153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2542095184326172, "epoch": 2.3, "learning_rate": 3.850803043110735e-05, "loss": 1.1003, "step": 2719, "task_loss": 1.3991535902023315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6531649231910706, "epoch": 2.3, "learning_rate": 3.850380388841928e-05, "loss": 0.9238, "step": 2720, "task_loss": 1.1276453733444214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7287998199462891, "epoch": 2.3, "learning_rate": 3.849957734573119e-05, "loss": 0.7562, "step": 2721, "task_loss": 0.3502250015735626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1081236600875854, "epoch": 2.3, "learning_rate": 3.849535080304311e-05, "loss": 0.7874, "step": 2722, "task_loss": 0.49908962845802307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6261146068572998, "epoch": 2.3, "learning_rate": 3.849112426035503e-05, "loss": 0.8146, "step": 2723, "task_loss": 0.728874146938324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9258499145507812, "epoch": 2.3, "learning_rate": 3.8486897717666944e-05, "loss": 0.885, "step": 2724, "task_loss": 0.6445170044898987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0069427490234375, "epoch": 2.3, "learning_rate": 3.848267117497887e-05, "loss": 1.0715, "step": 2725, "task_loss": 0.8258471488952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7842346429824829, "epoch": 2.3, "learning_rate": 3.847844463229079e-05, "loss": 1.0863, "step": 2726, "task_loss": 0.7736535668373108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7150890827178955, "epoch": 2.3, "learning_rate": 3.847421808960271e-05, "loss": 0.901, "step": 2727, "task_loss": 0.699732780456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8901740312576294, "epoch": 2.31, "learning_rate": 3.846999154691462e-05, "loss": 1.0364, "step": 2728, "task_loss": 0.9829272031784058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0776212215423584, "epoch": 2.31, "learning_rate": 3.846576500422654e-05, "loss": 1.0861, "step": 2729, "task_loss": 2.727968692779541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2469274997711182, "epoch": 2.31, "learning_rate": 3.846153846153846e-05, "loss": 0.9628, "step": 2730, "task_loss": 0.5393826961517334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4563027322292328, "epoch": 2.31, "learning_rate": 3.845731191885038e-05, "loss": 0.7541, "step": 2731, "task_loss": 0.3857778310775757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0147790908813477, "epoch": 2.31, "learning_rate": 3.84530853761623e-05, "loss": 1.0398, "step": 2732, "task_loss": 1.0130153894424438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9169516563415527, "epoch": 2.31, "learning_rate": 3.844885883347422e-05, "loss": 0.9369, "step": 2733, "task_loss": 1.0595557689666748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5466210842132568, "epoch": 2.31, "learning_rate": 3.8444632290786135e-05, "loss": 0.6902, "step": 2734, "task_loss": 0.8124677538871765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0851749181747437, "epoch": 2.31, "learning_rate": 3.8440405748098055e-05, "loss": 1.0223, "step": 2735, "task_loss": 0.4945577383041382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9139431715011597, "epoch": 2.31, "learning_rate": 3.8436179205409975e-05, "loss": 0.8949, "step": 2736, "task_loss": 0.329665869474411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6739212274551392, "epoch": 2.31, "learning_rate": 3.8431952662721894e-05, "loss": 0.786, "step": 2737, "task_loss": 1.281335473060608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4547343254089355, "epoch": 2.31, "learning_rate": 3.8427726120033814e-05, "loss": 1.0999, "step": 2738, "task_loss": 1.8744821548461914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6891152858734131, "epoch": 2.32, "learning_rate": 3.8423499577345734e-05, "loss": 0.7368, "step": 2739, "task_loss": 0.7544235587120056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8011736869812012, "epoch": 2.32, "learning_rate": 3.8419273034657653e-05, "loss": 0.9129, "step": 2740, "task_loss": 1.2579606771469116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9575057625770569, "epoch": 2.32, "learning_rate": 3.8415046491969566e-05, "loss": 1.1874, "step": 2741, "task_loss": 0.6582731604576111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8653138875961304, "epoch": 2.32, "learning_rate": 3.841081994928149e-05, "loss": 0.9631, "step": 2742, "task_loss": 0.9822165966033936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7020207047462463, "epoch": 2.32, "learning_rate": 3.840659340659341e-05, "loss": 0.6931, "step": 2743, "task_loss": 2.2094030380249023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1200835704803467, "epoch": 2.32, "learning_rate": 3.8402366863905326e-05, "loss": 0.8202, "step": 2744, "task_loss": 0.502842128276825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3473713397979736, "epoch": 2.32, "learning_rate": 3.8398140321217245e-05, "loss": 0.8812, "step": 2745, "task_loss": 0.5459030866622925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9201637506484985, "epoch": 2.32, "learning_rate": 3.8393913778529165e-05, "loss": 0.8083, "step": 2746, "task_loss": 0.5270050764083862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7367267608642578, "epoch": 2.32, "learning_rate": 3.8389687235841085e-05, "loss": 0.8392, "step": 2747, "task_loss": 1.0417191982269287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.724015474319458, "epoch": 2.32, "learning_rate": 3.8385460693153005e-05, "loss": 0.9245, "step": 2748, "task_loss": 0.8689031004905701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9509819746017456, "epoch": 2.32, "learning_rate": 3.8381234150464924e-05, "loss": 0.8218, "step": 2749, "task_loss": 1.7260948419570923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6981604695320129, "epoch": 2.32, "learning_rate": 3.837700760777684e-05, "loss": 0.8546, "step": 2750, "task_loss": 0.4585670232772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8247796297073364, "epoch": 2.33, "learning_rate": 3.837278106508876e-05, "loss": 1.0048, "step": 2751, "task_loss": 1.0435298681259155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9197900891304016, "epoch": 2.33, "learning_rate": 3.836855452240068e-05, "loss": 1.1241, "step": 2752, "task_loss": 0.8211560249328613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4762333631515503, "epoch": 2.33, "learning_rate": 3.8364327979712597e-05, "loss": 0.6653, "step": 2753, "task_loss": 0.5741895437240601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.156245231628418, "epoch": 2.33, "learning_rate": 3.8360101437024516e-05, "loss": 1.103, "step": 2754, "task_loss": 1.2082412242889404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3442273139953613, "epoch": 2.33, "learning_rate": 3.8355874894336436e-05, "loss": 1.1644, "step": 2755, "task_loss": 0.9181085228919983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8421118259429932, "epoch": 2.33, "learning_rate": 3.8351648351648356e-05, "loss": 0.8537, "step": 2756, "task_loss": 0.7524453997612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0632362365722656, "epoch": 2.33, "learning_rate": 3.834742180896027e-05, "loss": 1.0169, "step": 2757, "task_loss": 1.1791553497314453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8661927580833435, "epoch": 2.33, "learning_rate": 3.834319526627219e-05, "loss": 0.8457, "step": 2758, "task_loss": 0.5272379517555237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7913994789123535, "epoch": 2.33, "learning_rate": 3.8338968723584115e-05, "loss": 1.1334, "step": 2759, "task_loss": 2.080298900604248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7201851010322571, "epoch": 2.33, "learning_rate": 3.833474218089603e-05, "loss": 0.8741, "step": 2760, "task_loss": 0.9891384243965149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7822756767272949, "epoch": 2.33, "learning_rate": 3.833051563820795e-05, "loss": 0.8047, "step": 2761, "task_loss": 0.6375095248222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.438657283782959, "epoch": 2.33, "learning_rate": 3.832628909551987e-05, "loss": 1.015, "step": 2762, "task_loss": 1.7634660005569458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1297513246536255, "epoch": 2.34, "learning_rate": 3.832206255283178e-05, "loss": 0.8486, "step": 2763, "task_loss": 1.720613718032837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0095951557159424, "epoch": 2.34, "learning_rate": 3.831783601014371e-05, "loss": 0.7871, "step": 2764, "task_loss": 1.1953233480453491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0161008834838867, "epoch": 2.34, "learning_rate": 3.8313609467455627e-05, "loss": 0.7758, "step": 2765, "task_loss": 0.6029589176177979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6541587114334106, "epoch": 2.34, "learning_rate": 3.830938292476754e-05, "loss": 0.7323, "step": 2766, "task_loss": 1.05960214138031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5586663484573364, "epoch": 2.34, "learning_rate": 3.830515638207946e-05, "loss": 0.9495, "step": 2767, "task_loss": 0.4589826166629791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9883490800857544, "epoch": 2.34, "learning_rate": 3.830092983939138e-05, "loss": 1.034, "step": 2768, "task_loss": 0.4439418613910675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7511551380157471, "epoch": 2.34, "learning_rate": 3.82967032967033e-05, "loss": 0.7189, "step": 2769, "task_loss": 0.3807438015937805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1827082633972168, "epoch": 2.34, "learning_rate": 3.829247675401522e-05, "loss": 0.8532, "step": 2770, "task_loss": 1.0165820121765137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3562498092651367, "epoch": 2.34, "learning_rate": 3.828825021132714e-05, "loss": 1.0029, "step": 2771, "task_loss": 0.637798547744751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8920499086380005, "epoch": 2.34, "learning_rate": 3.828402366863906e-05, "loss": 0.9119, "step": 2772, "task_loss": 1.7400342226028442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4420353174209595, "epoch": 2.34, "learning_rate": 3.827979712595097e-05, "loss": 0.9025, "step": 2773, "task_loss": 0.5119230151176453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8004299402236938, "epoch": 2.34, "learning_rate": 3.827557058326289e-05, "loss": 0.7786, "step": 2774, "task_loss": 0.563220739364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0306073427200317, "epoch": 2.35, "learning_rate": 3.827134404057481e-05, "loss": 0.9229, "step": 2775, "task_loss": 0.6508654356002808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7608354091644287, "epoch": 2.35, "learning_rate": 3.826711749788673e-05, "loss": 0.8958, "step": 2776, "task_loss": 0.968728244304657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7283639311790466, "epoch": 2.35, "learning_rate": 3.826289095519865e-05, "loss": 0.734, "step": 2777, "task_loss": 0.6921197772026062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5912861824035645, "epoch": 2.35, "learning_rate": 3.825866441251057e-05, "loss": 0.7005, "step": 2778, "task_loss": 0.24096111953258514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6926796436309814, "epoch": 2.35, "learning_rate": 3.825443786982248e-05, "loss": 0.9106, "step": 2779, "task_loss": 1.185141921043396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0011357069015503, "epoch": 2.35, "learning_rate": 3.82502113271344e-05, "loss": 1.1086, "step": 2780, "task_loss": 0.8280209898948669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8165407180786133, "epoch": 2.35, "learning_rate": 3.824598478444633e-05, "loss": 0.9512, "step": 2781, "task_loss": 1.393274188041687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2185384035110474, "epoch": 2.35, "learning_rate": 3.824175824175824e-05, "loss": 0.9892, "step": 2782, "task_loss": 1.0361679792404175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8662289977073669, "epoch": 2.35, "learning_rate": 3.823753169907016e-05, "loss": 0.9756, "step": 2783, "task_loss": 1.1324551105499268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7349108457565308, "epoch": 2.35, "learning_rate": 3.823330515638208e-05, "loss": 0.5927, "step": 2784, "task_loss": 0.7382093071937561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0079318284988403, "epoch": 2.35, "learning_rate": 3.8229078613694e-05, "loss": 1.2044, "step": 2785, "task_loss": 0.6155231595039368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0645049810409546, "epoch": 2.35, "learning_rate": 3.822485207100592e-05, "loss": 1.2562, "step": 2786, "task_loss": 1.8873682022094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.124566674232483, "epoch": 2.36, "learning_rate": 3.822062552831784e-05, "loss": 1.1087, "step": 2787, "task_loss": 1.3697583675384521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1162587404251099, "epoch": 2.36, "learning_rate": 3.821639898562976e-05, "loss": 0.9679, "step": 2788, "task_loss": 1.2720139026641846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8952822685241699, "epoch": 2.36, "learning_rate": 3.821217244294167e-05, "loss": 1.0339, "step": 2789, "task_loss": 0.8598231673240662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.898161768913269, "epoch": 2.36, "learning_rate": 3.820794590025359e-05, "loss": 1.0083, "step": 2790, "task_loss": 0.5518762469291687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8672978281974792, "epoch": 2.36, "learning_rate": 3.820371935756551e-05, "loss": 0.8177, "step": 2791, "task_loss": 0.7514976263046265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1426260471343994, "epoch": 2.36, "learning_rate": 3.819949281487743e-05, "loss": 1.025, "step": 2792, "task_loss": 0.9490423798561096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2920037508010864, "epoch": 2.36, "learning_rate": 3.819526627218935e-05, "loss": 0.8818, "step": 2793, "task_loss": 1.3418645858764648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.809194803237915, "epoch": 2.36, "learning_rate": 3.819103972950127e-05, "loss": 1.1807, "step": 2794, "task_loss": 0.8744732141494751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4683275818824768, "epoch": 2.36, "learning_rate": 3.8186813186813185e-05, "loss": 0.9883, "step": 2795, "task_loss": 0.46207037568092346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8712544441223145, "epoch": 2.36, "learning_rate": 3.8182586644125105e-05, "loss": 1.1729, "step": 2796, "task_loss": 1.0861806869506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8199642896652222, "epoch": 2.36, "learning_rate": 3.8178360101437024e-05, "loss": 0.8701, "step": 2797, "task_loss": 0.8214704990386963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7675526738166809, "epoch": 2.36, "learning_rate": 3.817413355874895e-05, "loss": 0.7958, "step": 2798, "task_loss": 0.3009050488471985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9251284599304199, "epoch": 2.37, "learning_rate": 3.8169907016060864e-05, "loss": 0.9854, "step": 2799, "task_loss": 1.2297422885894775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3322408199310303, "epoch": 2.37, "learning_rate": 3.8165680473372784e-05, "loss": 1.1804, "step": 2800, "task_loss": 0.9608936905860901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8078939914703369, "epoch": 2.37, "learning_rate": 3.81614539306847e-05, "loss": 0.8534, "step": 2801, "task_loss": 0.6127941012382507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4568567276000977, "epoch": 2.37, "learning_rate": 3.8157227387996616e-05, "loss": 0.9776, "step": 2802, "task_loss": 1.3818163871765137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7733312249183655, "epoch": 2.37, "learning_rate": 3.815300084530854e-05, "loss": 0.8674, "step": 2803, "task_loss": 0.9998605251312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9046659469604492, "epoch": 2.37, "learning_rate": 3.814877430262046e-05, "loss": 0.9599, "step": 2804, "task_loss": 0.7956311702728271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0796973705291748, "epoch": 2.37, "learning_rate": 3.8144547759932375e-05, "loss": 0.9899, "step": 2805, "task_loss": 1.750176191329956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.581706166267395, "epoch": 2.37, "learning_rate": 3.8140321217244295e-05, "loss": 0.9691, "step": 2806, "task_loss": 0.6713460683822632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9355568289756775, "epoch": 2.37, "learning_rate": 3.8136094674556215e-05, "loss": 0.983, "step": 2807, "task_loss": 0.8749027848243713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1323107481002808, "epoch": 2.37, "learning_rate": 3.8131868131868135e-05, "loss": 0.8204, "step": 2808, "task_loss": 1.021223783493042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9036589860916138, "epoch": 2.37, "learning_rate": 3.8127641589180054e-05, "loss": 0.9869, "step": 2809, "task_loss": 0.6791079640388489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9798613786697388, "epoch": 2.38, "learning_rate": 3.8123415046491974e-05, "loss": 0.9178, "step": 2810, "task_loss": 1.2401801347732544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9742584824562073, "epoch": 2.38, "learning_rate": 3.811918850380389e-05, "loss": 1.1287, "step": 2811, "task_loss": 0.8988955616950989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2078886032104492, "epoch": 2.38, "learning_rate": 3.811496196111581e-05, "loss": 0.9508, "step": 2812, "task_loss": 0.9693688154220581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.503617525100708, "epoch": 2.38, "learning_rate": 3.811073541842773e-05, "loss": 0.9074, "step": 2813, "task_loss": 1.4440945386886597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5045994520187378, "epoch": 2.38, "learning_rate": 3.8106508875739646e-05, "loss": 0.8363, "step": 2814, "task_loss": 0.3252667188644409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7501572370529175, "epoch": 2.38, "learning_rate": 3.8102282333051566e-05, "loss": 0.9259, "step": 2815, "task_loss": 0.5465265512466431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5216963291168213, "epoch": 2.38, "learning_rate": 3.8098055790363486e-05, "loss": 1.1598, "step": 2816, "task_loss": 1.1024634838104248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1000169515609741, "epoch": 2.38, "learning_rate": 3.8093829247675406e-05, "loss": 0.7246, "step": 2817, "task_loss": 0.9394451379776001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5417129993438721, "epoch": 2.38, "learning_rate": 3.808960270498732e-05, "loss": 0.7452, "step": 2818, "task_loss": 0.20770999789237976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5670961737632751, "epoch": 2.38, "learning_rate": 3.808537616229924e-05, "loss": 0.7114, "step": 2819, "task_loss": 0.7518359422683716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8874340057373047, "epoch": 2.38, "learning_rate": 3.8081149619611165e-05, "loss": 0.7689, "step": 2820, "task_loss": 1.8740922212600708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8931160569190979, "epoch": 2.38, "learning_rate": 3.807692307692308e-05, "loss": 0.8219, "step": 2821, "task_loss": 0.38368308544158936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.794457733631134, "epoch": 2.39, "learning_rate": 3.8072696534235e-05, "loss": 0.9938, "step": 2822, "task_loss": 0.8908917903900146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8437801599502563, "epoch": 2.39, "learning_rate": 3.806846999154692e-05, "loss": 1.1097, "step": 2823, "task_loss": 1.163989543914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8442609310150146, "epoch": 2.39, "learning_rate": 3.806424344885883e-05, "loss": 0.7882, "step": 2824, "task_loss": 0.5577024817466736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7913405895233154, "epoch": 2.39, "learning_rate": 3.806001690617076e-05, "loss": 1.2467, "step": 2825, "task_loss": 0.7866659164428711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.442245602607727, "epoch": 2.39, "learning_rate": 3.8055790363482676e-05, "loss": 0.8563, "step": 2826, "task_loss": 1.3980008363723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2754932641983032, "epoch": 2.39, "learning_rate": 3.8051563820794596e-05, "loss": 0.9085, "step": 2827, "task_loss": 0.8194046020507812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6932305097579956, "epoch": 2.39, "learning_rate": 3.804733727810651e-05, "loss": 0.7479, "step": 2828, "task_loss": 0.5999937653541565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.947229266166687, "epoch": 2.39, "learning_rate": 3.804311073541843e-05, "loss": 0.9813, "step": 2829, "task_loss": 1.659379243850708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2015345096588135, "epoch": 2.39, "learning_rate": 3.803888419273035e-05, "loss": 1.0021, "step": 2830, "task_loss": 0.7031340003013611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0736157894134521, "epoch": 2.39, "learning_rate": 3.803465765004227e-05, "loss": 0.8181, "step": 2831, "task_loss": 0.5764914155006409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9041352272033691, "epoch": 2.39, "learning_rate": 3.803043110735419e-05, "loss": 0.9886, "step": 2832, "task_loss": 0.5194782018661499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1091835498809814, "epoch": 2.39, "learning_rate": 3.802620456466611e-05, "loss": 0.7039, "step": 2833, "task_loss": 1.4340245723724365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7108336687088013, "epoch": 2.4, "learning_rate": 3.802197802197802e-05, "loss": 0.7947, "step": 2834, "task_loss": 0.9190305471420288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.643272876739502, "epoch": 2.4, "learning_rate": 3.801775147928994e-05, "loss": 0.8723, "step": 2835, "task_loss": 0.7608177661895752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1798882484436035, "epoch": 2.4, "learning_rate": 3.801352493660186e-05, "loss": 0.8782, "step": 2836, "task_loss": 1.4496816396713257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8791857361793518, "epoch": 2.4, "learning_rate": 3.800929839391378e-05, "loss": 0.816, "step": 2837, "task_loss": 0.7793219685554504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.612616777420044, "epoch": 2.4, "learning_rate": 3.80050718512257e-05, "loss": 1.0627, "step": 2838, "task_loss": 0.5208151936531067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6477140784263611, "epoch": 2.4, "learning_rate": 3.800084530853762e-05, "loss": 0.7009, "step": 2839, "task_loss": 0.7624218463897705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9690272808074951, "epoch": 2.4, "learning_rate": 3.799661876584953e-05, "loss": 1.0545, "step": 2840, "task_loss": 1.3269321918487549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6555296182632446, "epoch": 2.4, "learning_rate": 3.799239222316145e-05, "loss": 0.8966, "step": 2841, "task_loss": 1.1732136011123657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1967496871948242, "epoch": 2.4, "learning_rate": 3.798816568047338e-05, "loss": 1.0562, "step": 2842, "task_loss": 0.7597159147262573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6872501373291016, "epoch": 2.4, "learning_rate": 3.79839391377853e-05, "loss": 0.9537, "step": 2843, "task_loss": 0.5742092728614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.521113932132721, "epoch": 2.4, "learning_rate": 3.797971259509721e-05, "loss": 0.8332, "step": 2844, "task_loss": 0.5091487169265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8770514726638794, "epoch": 2.4, "learning_rate": 3.797548605240913e-05, "loss": 0.8272, "step": 2845, "task_loss": 0.4415215253829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37679287791252136, "epoch": 2.41, "learning_rate": 3.797125950972105e-05, "loss": 0.8942, "step": 2846, "task_loss": 0.3382928669452667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6787087917327881, "epoch": 2.41, "learning_rate": 3.7967032967032964e-05, "loss": 0.8057, "step": 2847, "task_loss": 0.8066645264625549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4253851175308228, "epoch": 2.41, "learning_rate": 3.796280642434489e-05, "loss": 1.0801, "step": 2848, "task_loss": 0.9736409187316895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1561572551727295, "epoch": 2.41, "learning_rate": 3.795857988165681e-05, "loss": 1.1471, "step": 2849, "task_loss": 0.3738325834274292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8552138805389404, "epoch": 2.41, "learning_rate": 3.795435333896872e-05, "loss": 0.8915, "step": 2850, "task_loss": 0.5768214464187622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6983251571655273, "epoch": 2.41, "learning_rate": 3.795012679628064e-05, "loss": 0.6199, "step": 2851, "task_loss": 0.4596404731273651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7657592296600342, "epoch": 2.41, "learning_rate": 3.794590025359256e-05, "loss": 0.8677, "step": 2852, "task_loss": 1.4275768995285034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8488539457321167, "epoch": 2.41, "learning_rate": 3.794167371090448e-05, "loss": 0.8849, "step": 2853, "task_loss": 0.08751793950796127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8718726634979248, "epoch": 2.41, "learning_rate": 3.79374471682164e-05, "loss": 0.8725, "step": 2854, "task_loss": 0.6644324660301208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0168962478637695, "epoch": 2.41, "learning_rate": 3.793322062552832e-05, "loss": 0.8824, "step": 2855, "task_loss": 1.6163610219955444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5451105237007141, "epoch": 2.41, "learning_rate": 3.792899408284024e-05, "loss": 0.8378, "step": 2856, "task_loss": 1.109480857849121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6393899917602539, "epoch": 2.41, "learning_rate": 3.7924767540152154e-05, "loss": 0.7904, "step": 2857, "task_loss": 1.615539789199829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9568266868591309, "epoch": 2.42, "learning_rate": 3.7920540997464074e-05, "loss": 0.8148, "step": 2858, "task_loss": 1.2032114267349243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9986792802810669, "epoch": 2.42, "learning_rate": 3.7916314454776e-05, "loss": 0.8691, "step": 2859, "task_loss": 0.6073633432388306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46091777086257935, "epoch": 2.42, "learning_rate": 3.7912087912087914e-05, "loss": 0.7717, "step": 2860, "task_loss": 0.6288771033287048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7875893115997314, "epoch": 2.42, "learning_rate": 3.7907861369399833e-05, "loss": 0.7434, "step": 2861, "task_loss": 1.2561161518096924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7107213139533997, "epoch": 2.42, "learning_rate": 3.790363482671175e-05, "loss": 0.8734, "step": 2862, "task_loss": 0.5317698121070862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7418437004089355, "epoch": 2.42, "learning_rate": 3.7899408284023666e-05, "loss": 1.0113, "step": 2863, "task_loss": 1.2537049055099487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.535418689250946, "epoch": 2.42, "learning_rate": 3.7895181741335586e-05, "loss": 0.531, "step": 2864, "task_loss": 0.5316053032875061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7312465906143188, "epoch": 2.42, "learning_rate": 3.789095519864751e-05, "loss": 0.8328, "step": 2865, "task_loss": 0.1997838169336319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3078303337097168, "epoch": 2.42, "learning_rate": 3.7886728655959425e-05, "loss": 1.1117, "step": 2866, "task_loss": 1.3781254291534424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47803449630737305, "epoch": 2.42, "learning_rate": 3.7882502113271345e-05, "loss": 0.8111, "step": 2867, "task_loss": 0.13380055129528046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8989896774291992, "epoch": 2.42, "learning_rate": 3.7878275570583265e-05, "loss": 1.049, "step": 2868, "task_loss": 0.9677106142044067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5937565565109253, "epoch": 2.42, "learning_rate": 3.787404902789518e-05, "loss": 0.9118, "step": 2869, "task_loss": 0.35147061944007874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5706838369369507, "epoch": 2.43, "learning_rate": 3.7869822485207104e-05, "loss": 1.0731, "step": 2870, "task_loss": 0.7251583337783813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9305149912834167, "epoch": 2.43, "learning_rate": 3.7865595942519024e-05, "loss": 0.9065, "step": 2871, "task_loss": 1.2101788520812988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7045131921768188, "epoch": 2.43, "learning_rate": 3.7861369399830944e-05, "loss": 1.0504, "step": 2872, "task_loss": 1.1862872838974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44650229811668396, "epoch": 2.43, "learning_rate": 3.785714285714286e-05, "loss": 0.5797, "step": 2873, "task_loss": 0.6658046245574951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4747769832611084, "epoch": 2.43, "learning_rate": 3.7852916314454776e-05, "loss": 0.8405, "step": 2874, "task_loss": 0.29629307985305786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0143952369689941, "epoch": 2.43, "learning_rate": 3.7848689771766696e-05, "loss": 1.1113, "step": 2875, "task_loss": 2.073786735534668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6739494204521179, "epoch": 2.43, "learning_rate": 3.7844463229078616e-05, "loss": 0.9615, "step": 2876, "task_loss": 0.9683629870414734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.92393958568573, "epoch": 2.43, "learning_rate": 3.7840236686390536e-05, "loss": 0.8059, "step": 2877, "task_loss": 0.9110194444656372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9442762136459351, "epoch": 2.43, "learning_rate": 3.7836010143702455e-05, "loss": 1.1131, "step": 2878, "task_loss": 1.1371574401855469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0322346687316895, "epoch": 2.43, "learning_rate": 3.783178360101437e-05, "loss": 0.8629, "step": 2879, "task_loss": 0.4763219356536865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.060889720916748, "epoch": 2.43, "learning_rate": 3.782755705832629e-05, "loss": 0.8096, "step": 2880, "task_loss": 0.9987779259681702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4273499250411987, "epoch": 2.44, "learning_rate": 3.782333051563821e-05, "loss": 1.0835, "step": 2881, "task_loss": 0.9675452709197998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.128800392150879, "epoch": 2.44, "learning_rate": 3.781910397295013e-05, "loss": 0.9938, "step": 2882, "task_loss": 0.7573325037956238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6825935244560242, "epoch": 2.44, "learning_rate": 3.781487743026205e-05, "loss": 0.6113, "step": 2883, "task_loss": 0.5623762607574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5015822649002075, "epoch": 2.44, "learning_rate": 3.781065088757397e-05, "loss": 0.7364, "step": 2884, "task_loss": 0.18396219611167908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1744149923324585, "epoch": 2.44, "learning_rate": 3.780642434488589e-05, "loss": 0.869, "step": 2885, "task_loss": 1.084243655204773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7606294751167297, "epoch": 2.44, "learning_rate": 3.78021978021978e-05, "loss": 1.1993, "step": 2886, "task_loss": 0.7473613023757935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3170629143714905, "epoch": 2.44, "learning_rate": 3.7797971259509726e-05, "loss": 0.7858, "step": 2887, "task_loss": 0.508811891078949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4943113327026367, "epoch": 2.44, "learning_rate": 3.7793744716821646e-05, "loss": 0.7492, "step": 2888, "task_loss": 0.7180906534194946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7753883600234985, "epoch": 2.44, "learning_rate": 3.778951817413356e-05, "loss": 0.9868, "step": 2889, "task_loss": 0.4151588976383209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7795724272727966, "epoch": 2.44, "learning_rate": 3.778529163144548e-05, "loss": 0.8007, "step": 2890, "task_loss": 0.28128159046173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6224783658981323, "epoch": 2.44, "learning_rate": 3.77810650887574e-05, "loss": 0.5659, "step": 2891, "task_loss": 0.167165145277977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7167070508003235, "epoch": 2.44, "learning_rate": 3.777683854606932e-05, "loss": 1.0862, "step": 2892, "task_loss": 0.43149879574775696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5128650665283203, "epoch": 2.45, "learning_rate": 3.777261200338124e-05, "loss": 0.6924, "step": 2893, "task_loss": 0.4081245958805084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5909888744354248, "epoch": 2.45, "learning_rate": 3.776838546069316e-05, "loss": 0.5409, "step": 2894, "task_loss": 0.878649115562439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.476996898651123, "epoch": 2.45, "learning_rate": 3.776415891800507e-05, "loss": 0.9689, "step": 2895, "task_loss": 0.6604695320129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5343919396400452, "epoch": 2.45, "learning_rate": 3.775993237531699e-05, "loss": 0.7259, "step": 2896, "task_loss": 0.04899701476097107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4607908725738525, "epoch": 2.45, "learning_rate": 3.775570583262891e-05, "loss": 1.0346, "step": 2897, "task_loss": 0.9924553632736206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7702344655990601, "epoch": 2.45, "learning_rate": 3.775147928994083e-05, "loss": 0.9853, "step": 2898, "task_loss": 0.5657379031181335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9297490119934082, "epoch": 2.45, "learning_rate": 3.774725274725275e-05, "loss": 0.7764, "step": 2899, "task_loss": 0.8503624796867371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6266500949859619, "epoch": 2.45, "learning_rate": 3.774302620456467e-05, "loss": 0.9686, "step": 2900, "task_loss": 0.5785765051841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.264108419418335, "epoch": 2.45, "learning_rate": 3.773879966187659e-05, "loss": 0.9798, "step": 2901, "task_loss": 1.1112068891525269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6151860356330872, "epoch": 2.45, "learning_rate": 3.77345731191885e-05, "loss": 0.7393, "step": 2902, "task_loss": 0.40300264954566956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8651176691055298, "epoch": 2.45, "learning_rate": 3.773034657650042e-05, "loss": 0.7074, "step": 2903, "task_loss": 0.4849865436553955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7949943542480469, "epoch": 2.45, "learning_rate": 3.772612003381235e-05, "loss": 0.9134, "step": 2904, "task_loss": 0.6825757622718811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.101493239402771, "epoch": 2.46, "learning_rate": 3.772189349112426e-05, "loss": 0.8243, "step": 2905, "task_loss": 1.1920785903930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2680885791778564, "epoch": 2.46, "learning_rate": 3.771766694843618e-05, "loss": 1.0018, "step": 2906, "task_loss": 1.1242799758911133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7173442244529724, "epoch": 2.46, "learning_rate": 3.77134404057481e-05, "loss": 0.8033, "step": 2907, "task_loss": 1.3211387395858765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6088575124740601, "epoch": 2.46, "learning_rate": 3.7709213863060014e-05, "loss": 0.6901, "step": 2908, "task_loss": 0.5301983952522278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9267017841339111, "epoch": 2.46, "learning_rate": 3.770498732037194e-05, "loss": 1.0039, "step": 2909, "task_loss": 1.302922248840332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8784389495849609, "epoch": 2.46, "learning_rate": 3.770076077768386e-05, "loss": 0.9265, "step": 2910, "task_loss": 1.1465840339660645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5505959987640381, "epoch": 2.46, "learning_rate": 3.769653423499577e-05, "loss": 0.7087, "step": 2911, "task_loss": 0.3177320957183838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8098050355911255, "epoch": 2.46, "learning_rate": 3.769230769230769e-05, "loss": 0.85, "step": 2912, "task_loss": 0.668305516242981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3092848062515259, "epoch": 2.46, "learning_rate": 3.768808114961961e-05, "loss": 0.8966, "step": 2913, "task_loss": 0.5536526441574097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6809405088424683, "epoch": 2.46, "learning_rate": 3.768385460693153e-05, "loss": 0.8047, "step": 2914, "task_loss": 0.7208324074745178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.644074022769928, "epoch": 2.46, "learning_rate": 3.767962806424345e-05, "loss": 0.7074, "step": 2915, "task_loss": 0.5879451632499695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9256362915039062, "epoch": 2.46, "learning_rate": 3.767540152155537e-05, "loss": 0.9838, "step": 2916, "task_loss": 1.6519067287445068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5502112507820129, "epoch": 2.47, "learning_rate": 3.767117497886729e-05, "loss": 0.8988, "step": 2917, "task_loss": 0.35601359605789185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5163922905921936, "epoch": 2.47, "learning_rate": 3.7666948436179204e-05, "loss": 0.7384, "step": 2918, "task_loss": 0.16870497167110443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6808202266693115, "epoch": 2.47, "learning_rate": 3.7662721893491124e-05, "loss": 0.8741, "step": 2919, "task_loss": 0.1618366837501526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7896201610565186, "epoch": 2.47, "learning_rate": 3.7658495350803044e-05, "loss": 0.8297, "step": 2920, "task_loss": 0.8454868793487549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5500733852386475, "epoch": 2.47, "learning_rate": 3.7654268808114964e-05, "loss": 0.9446, "step": 2921, "task_loss": 0.7291027903556824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.682433009147644, "epoch": 2.47, "learning_rate": 3.765004226542688e-05, "loss": 1.0764, "step": 2922, "task_loss": 1.1330301761627197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8980833292007446, "epoch": 2.47, "learning_rate": 3.76458157227388e-05, "loss": 0.812, "step": 2923, "task_loss": 0.4154594838619232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6040636301040649, "epoch": 2.47, "learning_rate": 3.7641589180050716e-05, "loss": 0.962, "step": 2924, "task_loss": 1.673122525215149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5187690854072571, "epoch": 2.47, "learning_rate": 3.7637362637362636e-05, "loss": 0.7238, "step": 2925, "task_loss": 0.8250047564506531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8777328133583069, "epoch": 2.47, "learning_rate": 3.763313609467456e-05, "loss": 0.9082, "step": 2926, "task_loss": 1.361264944076538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8328543901443481, "epoch": 2.47, "learning_rate": 3.7628909551986475e-05, "loss": 0.7689, "step": 2927, "task_loss": 0.6862871050834656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40595555305480957, "epoch": 2.47, "learning_rate": 3.7624683009298395e-05, "loss": 0.8602, "step": 2928, "task_loss": 0.661162257194519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4726664423942566, "epoch": 2.48, "learning_rate": 3.7620456466610315e-05, "loss": 0.8362, "step": 2929, "task_loss": 0.3763027489185333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26753073930740356, "epoch": 2.48, "learning_rate": 3.7616229923922234e-05, "loss": 0.5996, "step": 2930, "task_loss": 0.5454975962638855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6390641927719116, "epoch": 2.48, "learning_rate": 3.7612003381234154e-05, "loss": 0.6612, "step": 2931, "task_loss": 0.19475023448467255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2549428939819336, "epoch": 2.48, "learning_rate": 3.7607776838546074e-05, "loss": 1.0256, "step": 2932, "task_loss": 0.9723878502845764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0699330568313599, "epoch": 2.48, "learning_rate": 3.7603550295857994e-05, "loss": 0.8021, "step": 2933, "task_loss": 0.6617919206619263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8121709227561951, "epoch": 2.48, "learning_rate": 3.7599323753169907e-05, "loss": 0.9196, "step": 2934, "task_loss": 1.1484935283660889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5095109343528748, "epoch": 2.48, "learning_rate": 3.7595097210481826e-05, "loss": 0.8336, "step": 2935, "task_loss": 0.039555761963129044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8502115607261658, "epoch": 2.48, "learning_rate": 3.7590870667793746e-05, "loss": 0.7967, "step": 2936, "task_loss": 0.2476097047328949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1505329608917236, "epoch": 2.48, "learning_rate": 3.7586644125105666e-05, "loss": 1.0698, "step": 2937, "task_loss": 0.5473749041557312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39872610569000244, "epoch": 2.48, "learning_rate": 3.7582417582417586e-05, "loss": 0.8293, "step": 2938, "task_loss": 0.06977615505456924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.145108699798584, "epoch": 2.48, "learning_rate": 3.7578191039729505e-05, "loss": 1.1745, "step": 2939, "task_loss": 1.9666355848312378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9140480756759644, "epoch": 2.48, "learning_rate": 3.757396449704142e-05, "loss": 0.7633, "step": 2940, "task_loss": 0.8574177622795105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1026582717895508, "epoch": 2.49, "learning_rate": 3.756973795435334e-05, "loss": 0.8203, "step": 2941, "task_loss": 1.4931082725524902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1289174556732178, "epoch": 2.49, "learning_rate": 3.756551141166526e-05, "loss": 0.9108, "step": 2942, "task_loss": 1.2095997333526611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6390035152435303, "epoch": 2.49, "learning_rate": 3.7561284868977184e-05, "loss": 1.1012, "step": 2943, "task_loss": 0.9534785747528076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6785563230514526, "epoch": 2.49, "learning_rate": 3.75570583262891e-05, "loss": 0.7194, "step": 2944, "task_loss": 0.5434262752532959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2151813507080078, "epoch": 2.49, "learning_rate": 3.755283178360102e-05, "loss": 0.849, "step": 2945, "task_loss": 0.9139522910118103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6218034029006958, "epoch": 2.49, "learning_rate": 3.754860524091294e-05, "loss": 0.8019, "step": 2946, "task_loss": 1.1818902492523193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1467573642730713, "epoch": 2.49, "learning_rate": 3.754437869822485e-05, "loss": 0.8421, "step": 2947, "task_loss": 1.4039177894592285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7867821455001831, "epoch": 2.49, "learning_rate": 3.7540152155536776e-05, "loss": 0.8159, "step": 2948, "task_loss": 0.5303400754928589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9818558692932129, "epoch": 2.49, "learning_rate": 3.7535925612848696e-05, "loss": 0.9777, "step": 2949, "task_loss": 0.9032993912696838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7279269099235535, "epoch": 2.49, "learning_rate": 3.753169907016061e-05, "loss": 0.7193, "step": 2950, "task_loss": 0.6661729216575623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2358300685882568, "epoch": 2.49, "learning_rate": 3.752747252747253e-05, "loss": 1.0239, "step": 2951, "task_loss": 0.7185297608375549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7788811922073364, "epoch": 2.5, "learning_rate": 3.752324598478445e-05, "loss": 1.0897, "step": 2952, "task_loss": 1.0462582111358643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7787549495697021, "epoch": 2.5, "learning_rate": 3.751901944209637e-05, "loss": 0.8269, "step": 2953, "task_loss": 0.3610954284667969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7916620969772339, "epoch": 2.5, "learning_rate": 3.751479289940829e-05, "loss": 0.9498, "step": 2954, "task_loss": 0.3820704221725464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7843740582466125, "epoch": 2.5, "learning_rate": 3.751056635672021e-05, "loss": 0.8804, "step": 2955, "task_loss": 1.3656545877456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9761601686477661, "epoch": 2.5, "learning_rate": 3.750633981403212e-05, "loss": 1.4578, "step": 2956, "task_loss": 0.8883195519447327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5347753763198853, "epoch": 2.5, "learning_rate": 3.750211327134404e-05, "loss": 1.11, "step": 2957, "task_loss": 1.038547158241272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7983033657073975, "epoch": 2.5, "learning_rate": 3.749788672865596e-05, "loss": 0.7831, "step": 2958, "task_loss": 0.8317264914512634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5666301250457764, "epoch": 2.5, "learning_rate": 3.749366018596788e-05, "loss": 0.659, "step": 2959, "task_loss": 0.2887917160987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5068851113319397, "epoch": 2.5, "learning_rate": 3.74894336432798e-05, "loss": 0.8101, "step": 2960, "task_loss": 0.15603429079055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9002972841262817, "epoch": 2.5, "learning_rate": 3.748520710059172e-05, "loss": 0.8178, "step": 2961, "task_loss": 0.998735249042511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5958905220031738, "epoch": 2.5, "learning_rate": 3.748098055790364e-05, "loss": 0.7454, "step": 2962, "task_loss": 1.4648840427398682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6624780893325806, "epoch": 2.5, "learning_rate": 3.747675401521555e-05, "loss": 0.6671, "step": 2963, "task_loss": 0.7569023370742798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.987747073173523, "epoch": 2.51, "learning_rate": 3.747252747252747e-05, "loss": 0.7604, "step": 2964, "task_loss": 0.9567808508872986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8434939384460449, "epoch": 2.51, "learning_rate": 3.74683009298394e-05, "loss": 0.7836, "step": 2965, "task_loss": 0.4334939420223236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5708608627319336, "epoch": 2.51, "learning_rate": 3.746407438715131e-05, "loss": 0.605, "step": 2966, "task_loss": 1.0584946870803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.516164243221283, "epoch": 2.51, "learning_rate": 3.745984784446323e-05, "loss": 0.69, "step": 2967, "task_loss": 0.7146093845367432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.048117756843567, "epoch": 2.51, "learning_rate": 3.745562130177515e-05, "loss": 0.7239, "step": 2968, "task_loss": 0.7731064558029175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8396795988082886, "epoch": 2.51, "learning_rate": 3.7451394759087064e-05, "loss": 0.8928, "step": 2969, "task_loss": 0.8537657260894775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0115752220153809, "epoch": 2.51, "learning_rate": 3.744716821639899e-05, "loss": 1.0599, "step": 2970, "task_loss": 1.3865740299224854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35942721366882324, "epoch": 2.51, "learning_rate": 3.744294167371091e-05, "loss": 0.8565, "step": 2971, "task_loss": 0.16102173924446106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6482678651809692, "epoch": 2.51, "learning_rate": 3.743871513102282e-05, "loss": 0.8869, "step": 2972, "task_loss": 0.48032504320144653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7061709761619568, "epoch": 2.51, "learning_rate": 3.743448858833474e-05, "loss": 0.8946, "step": 2973, "task_loss": 1.0742160081863403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1326513290405273, "epoch": 2.51, "learning_rate": 3.743026204564666e-05, "loss": 0.8756, "step": 2974, "task_loss": 0.41850972175598145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0805472135543823, "epoch": 2.51, "learning_rate": 3.742603550295858e-05, "loss": 0.9646, "step": 2975, "task_loss": 1.9752739667892456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.032296895980835, "epoch": 2.52, "learning_rate": 3.74218089602705e-05, "loss": 0.8872, "step": 2976, "task_loss": 1.311806559562683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4265739321708679, "epoch": 2.52, "learning_rate": 3.741758241758242e-05, "loss": 0.8001, "step": 2977, "task_loss": 0.1314382553100586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9535980820655823, "epoch": 2.52, "learning_rate": 3.741335587489434e-05, "loss": 0.7161, "step": 2978, "task_loss": 0.531485915184021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8080883026123047, "epoch": 2.52, "learning_rate": 3.7409129332206254e-05, "loss": 1.0579, "step": 2979, "task_loss": 0.7735041379928589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4333162307739258, "epoch": 2.52, "learning_rate": 3.7404902789518174e-05, "loss": 0.8919, "step": 2980, "task_loss": 1.221501350402832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9503706097602844, "epoch": 2.52, "learning_rate": 3.7400676246830094e-05, "loss": 1.0048, "step": 2981, "task_loss": 0.3086860179901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.629895806312561, "epoch": 2.52, "learning_rate": 3.739644970414201e-05, "loss": 0.7436, "step": 2982, "task_loss": 0.11467395722866058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7233576774597168, "epoch": 2.52, "learning_rate": 3.739222316145393e-05, "loss": 0.7818, "step": 2983, "task_loss": 0.5050192475318909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.792982816696167, "epoch": 2.52, "learning_rate": 3.738799661876585e-05, "loss": 0.974, "step": 2984, "task_loss": 0.8836943507194519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5706294775009155, "epoch": 2.52, "learning_rate": 3.7383770076077766e-05, "loss": 0.6983, "step": 2985, "task_loss": 0.3576892614364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8437615036964417, "epoch": 2.52, "learning_rate": 3.7379543533389686e-05, "loss": 0.7512, "step": 2986, "task_loss": 0.7124462723731995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6693239212036133, "epoch": 2.52, "learning_rate": 3.737531699070161e-05, "loss": 0.8559, "step": 2987, "task_loss": 0.6283877491950989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5808197855949402, "epoch": 2.53, "learning_rate": 3.737109044801353e-05, "loss": 0.7049, "step": 2988, "task_loss": 0.7324851155281067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9331092834472656, "epoch": 2.53, "learning_rate": 3.7366863905325445e-05, "loss": 0.8273, "step": 2989, "task_loss": 0.6153925061225891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7532095909118652, "epoch": 2.53, "learning_rate": 3.7362637362637365e-05, "loss": 0.6142, "step": 2990, "task_loss": 0.6104395389556885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9392920732498169, "epoch": 2.53, "learning_rate": 3.7358410819949284e-05, "loss": 0.815, "step": 2991, "task_loss": 1.2604948282241821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6861684322357178, "epoch": 2.53, "learning_rate": 3.73541842772612e-05, "loss": 0.9983, "step": 2992, "task_loss": 1.0882362127304077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0361250638961792, "epoch": 2.53, "learning_rate": 3.7349957734573124e-05, "loss": 0.9349, "step": 2993, "task_loss": 1.1101155281066895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0074084997177124, "epoch": 2.53, "learning_rate": 3.7345731191885043e-05, "loss": 0.7469, "step": 2994, "task_loss": 1.1420098543167114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3461261987686157, "epoch": 2.53, "learning_rate": 3.7341504649196956e-05, "loss": 0.893, "step": 2995, "task_loss": 0.8898110389709473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6376475095748901, "epoch": 2.53, "learning_rate": 3.7337278106508876e-05, "loss": 0.8891, "step": 2996, "task_loss": 1.161588430404663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9966105222702026, "epoch": 2.53, "learning_rate": 3.7333051563820796e-05, "loss": 1.0386, "step": 2997, "task_loss": 1.0232625007629395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.771682858467102, "epoch": 2.53, "learning_rate": 3.7328825021132716e-05, "loss": 0.8376, "step": 2998, "task_loss": 0.8959367275238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.128648281097412, "epoch": 2.53, "learning_rate": 3.7324598478444635e-05, "loss": 1.1336, "step": 2999, "task_loss": 1.08405339717865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.165360927581787, "epoch": 2.54, "learning_rate": 3.7320371935756555e-05, "loss": 0.9728, "step": 3000, "task_loss": 0.2984207570552826 }, { "epoch": 2.54, "eval_accuracy": 0.8767128712871287, "eval_loss": 0.5263227820396423, "eval_runtime": 229.1154, "eval_samples_per_second": 110.206, "eval_steps_per_second": 0.864, "step": 3000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6945322751998901, "epoch": 2.54, "learning_rate": 3.731614539306847e-05, "loss": 0.7383, "step": 3001, "task_loss": 0.339668333530426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1984777450561523, "epoch": 2.54, "learning_rate": 3.731191885038039e-05, "loss": 0.8861, "step": 3002, "task_loss": 1.7218049764633179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6091169118881226, "epoch": 2.54, "learning_rate": 3.730769230769231e-05, "loss": 1.0612, "step": 3003, "task_loss": 0.9124078154563904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3853226900100708, "epoch": 2.54, "learning_rate": 3.7303465765004234e-05, "loss": 0.8872, "step": 3004, "task_loss": 0.09558850526809692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8512963652610779, "epoch": 2.54, "learning_rate": 3.729923922231615e-05, "loss": 0.774, "step": 3005, "task_loss": 1.222814917564392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8113721609115601, "epoch": 2.54, "learning_rate": 3.729501267962807e-05, "loss": 0.8248, "step": 3006, "task_loss": 1.6954820156097412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6979273557662964, "epoch": 2.54, "learning_rate": 3.7290786136939987e-05, "loss": 0.7942, "step": 3007, "task_loss": 0.5400323271751404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8330560922622681, "epoch": 2.54, "learning_rate": 3.72865595942519e-05, "loss": 0.8045, "step": 3008, "task_loss": 0.7397099137306213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8663599491119385, "epoch": 2.54, "learning_rate": 3.728233305156382e-05, "loss": 0.8128, "step": 3009, "task_loss": 1.2098785638809204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8310542106628418, "epoch": 2.54, "learning_rate": 3.7278106508875746e-05, "loss": 0.7465, "step": 3010, "task_loss": 1.1796890497207642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0074374675750732, "epoch": 2.54, "learning_rate": 3.727387996618766e-05, "loss": 0.8066, "step": 3011, "task_loss": 0.8589324355125427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5980572700500488, "epoch": 2.55, "learning_rate": 3.726965342349958e-05, "loss": 0.6101, "step": 3012, "task_loss": 0.5425428152084351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6983373165130615, "epoch": 2.55, "learning_rate": 3.72654268808115e-05, "loss": 0.7695, "step": 3013, "task_loss": 0.658515214920044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.256879210472107, "epoch": 2.55, "learning_rate": 3.726120033812341e-05, "loss": 0.8399, "step": 3014, "task_loss": 1.0945231914520264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.676827073097229, "epoch": 2.55, "learning_rate": 3.725697379543534e-05, "loss": 0.9211, "step": 3015, "task_loss": 0.5654138326644897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7911100387573242, "epoch": 2.55, "learning_rate": 3.725274725274726e-05, "loss": 0.947, "step": 3016, "task_loss": 0.8976695537567139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7871742248535156, "epoch": 2.55, "learning_rate": 3.724852071005918e-05, "loss": 1.097, "step": 3017, "task_loss": 1.3124561309814453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.759321928024292, "epoch": 2.55, "learning_rate": 3.724429416737109e-05, "loss": 0.6181, "step": 3018, "task_loss": 0.601523220539093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.903558075428009, "epoch": 2.55, "learning_rate": 3.724006762468301e-05, "loss": 0.8285, "step": 3019, "task_loss": 1.2055823802947998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6516184210777283, "epoch": 2.55, "learning_rate": 3.723584108199493e-05, "loss": 0.6495, "step": 3020, "task_loss": 0.5704653263092041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6494377851486206, "epoch": 2.55, "learning_rate": 3.723161453930685e-05, "loss": 0.7964, "step": 3021, "task_loss": 0.5845313668251038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0027871131896973, "epoch": 2.55, "learning_rate": 3.722738799661877e-05, "loss": 0.8559, "step": 3022, "task_loss": 1.3712655305862427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6782982349395752, "epoch": 2.56, "learning_rate": 3.722316145393069e-05, "loss": 0.8091, "step": 3023, "task_loss": 1.5369762182235718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9717000126838684, "epoch": 2.56, "learning_rate": 3.72189349112426e-05, "loss": 0.7861, "step": 3024, "task_loss": 0.5329558849334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9537562131881714, "epoch": 2.56, "learning_rate": 3.721470836855452e-05, "loss": 0.7164, "step": 3025, "task_loss": 0.9149600267410278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7581993937492371, "epoch": 2.56, "learning_rate": 3.721048182586644e-05, "loss": 0.8483, "step": 3026, "task_loss": 0.8229069113731384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8546748757362366, "epoch": 2.56, "learning_rate": 3.720625528317836e-05, "loss": 0.7743, "step": 3027, "task_loss": 0.7973331212997437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5849636793136597, "epoch": 2.56, "learning_rate": 3.720202874049028e-05, "loss": 0.6431, "step": 3028, "task_loss": 0.3173595368862152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41338205337524414, "epoch": 2.56, "learning_rate": 3.71978021978022e-05, "loss": 0.8066, "step": 3029, "task_loss": 0.9902318716049194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.916282057762146, "epoch": 2.56, "learning_rate": 3.7193575655114113e-05, "loss": 0.8332, "step": 3030, "task_loss": 0.9232667684555054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5028959512710571, "epoch": 2.56, "learning_rate": 3.718934911242603e-05, "loss": 0.7522, "step": 3031, "task_loss": 0.7833327651023865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.824494481086731, "epoch": 2.56, "learning_rate": 3.718512256973796e-05, "loss": 0.9899, "step": 3032, "task_loss": 1.2010328769683838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7857167720794678, "epoch": 2.56, "learning_rate": 3.718089602704988e-05, "loss": 0.9092, "step": 3033, "task_loss": 1.1328927278518677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.192674994468689, "epoch": 2.56, "learning_rate": 3.717666948436179e-05, "loss": 0.9829, "step": 3034, "task_loss": 2.6403465270996094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7629928588867188, "epoch": 2.57, "learning_rate": 3.717244294167371e-05, "loss": 0.8618, "step": 3035, "task_loss": 1.6077911853790283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.601716160774231, "epoch": 2.57, "learning_rate": 3.716821639898563e-05, "loss": 0.7449, "step": 3036, "task_loss": 1.882880449295044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33205413818359375, "epoch": 2.57, "learning_rate": 3.716398985629755e-05, "loss": 0.677, "step": 3037, "task_loss": 0.6416762471199036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6947457790374756, "epoch": 2.57, "learning_rate": 3.715976331360947e-05, "loss": 0.6151, "step": 3038, "task_loss": 1.2570137977600098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9590944647789001, "epoch": 2.57, "learning_rate": 3.715553677092139e-05, "loss": 0.7763, "step": 3039, "task_loss": 1.5594828128814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6918559074401855, "epoch": 2.57, "learning_rate": 3.7151310228233304e-05, "loss": 0.8121, "step": 3040, "task_loss": 1.1324849128723145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9227808713912964, "epoch": 2.57, "learning_rate": 3.7147083685545224e-05, "loss": 0.8574, "step": 3041, "task_loss": 1.1076278686523438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6765317916870117, "epoch": 2.57, "learning_rate": 3.7142857142857143e-05, "loss": 0.8381, "step": 3042, "task_loss": 1.0900061130523682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.9176989793777466, "epoch": 2.57, "learning_rate": 3.713863060016906e-05, "loss": 1.2098, "step": 3043, "task_loss": 1.4418178796768188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1096810102462769, "epoch": 2.57, "learning_rate": 3.713440405748098e-05, "loss": 1.0082, "step": 3044, "task_loss": 0.8642387390136719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0091372728347778, "epoch": 2.57, "learning_rate": 3.71301775147929e-05, "loss": 0.9573, "step": 3045, "task_loss": 0.8251848220825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8770987391471863, "epoch": 2.57, "learning_rate": 3.712595097210482e-05, "loss": 0.8056, "step": 3046, "task_loss": 0.43860915303230286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8536374568939209, "epoch": 2.58, "learning_rate": 3.7121724429416735e-05, "loss": 0.8514, "step": 3047, "task_loss": 0.5220704674720764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8492565155029297, "epoch": 2.58, "learning_rate": 3.7117497886728655e-05, "loss": 0.9157, "step": 3048, "task_loss": 1.6889747381210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6147288680076599, "epoch": 2.58, "learning_rate": 3.711327134404058e-05, "loss": 0.7799, "step": 3049, "task_loss": 1.0682823657989502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 2.528697967529297, "epoch": 2.58, "learning_rate": 3.7109044801352495e-05, "loss": 1.3538, "step": 3050, "task_loss": 1.2485682964324951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2987728118896484, "epoch": 2.58, "learning_rate": 3.7104818258664414e-05, "loss": 0.9158, "step": 3051, "task_loss": 1.3291471004486084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.193570852279663, "epoch": 2.58, "learning_rate": 3.7100591715976334e-05, "loss": 0.9452, "step": 3052, "task_loss": 0.9564276933670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5314393043518066, "epoch": 2.58, "learning_rate": 3.709636517328825e-05, "loss": 0.8745, "step": 3053, "task_loss": 0.9978659152984619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.655224084854126, "epoch": 2.58, "learning_rate": 3.7092138630600174e-05, "loss": 0.7079, "step": 3054, "task_loss": 0.8744780421257019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9647119045257568, "epoch": 2.58, "learning_rate": 3.708791208791209e-05, "loss": 0.97, "step": 3055, "task_loss": 1.4981162548065186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1862475872039795, "epoch": 2.58, "learning_rate": 3.7083685545224006e-05, "loss": 0.7877, "step": 3056, "task_loss": 0.99953693151474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3654099106788635, "epoch": 2.58, "learning_rate": 3.7079459002535926e-05, "loss": 0.6332, "step": 3057, "task_loss": 0.5118232369422913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6309930086135864, "epoch": 2.58, "learning_rate": 3.7075232459847846e-05, "loss": 0.7779, "step": 3058, "task_loss": 1.5678181648254395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3080609440803528, "epoch": 2.59, "learning_rate": 3.7071005917159765e-05, "loss": 0.5809, "step": 3059, "task_loss": 0.07553819566965103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4632693827152252, "epoch": 2.59, "learning_rate": 3.7066779374471685e-05, "loss": 0.7916, "step": 3060, "task_loss": 0.9021651744842529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7074669599533081, "epoch": 2.59, "learning_rate": 3.7062552831783605e-05, "loss": 0.7534, "step": 3061, "task_loss": 0.7439253926277161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7454609870910645, "epoch": 2.59, "learning_rate": 3.7058326289095525e-05, "loss": 0.8225, "step": 3062, "task_loss": 1.0032330751419067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7973410487174988, "epoch": 2.59, "learning_rate": 3.705409974640744e-05, "loss": 0.8569, "step": 3063, "task_loss": 1.0491392612457275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.993553876876831, "epoch": 2.59, "learning_rate": 3.704987320371936e-05, "loss": 1.0378, "step": 3064, "task_loss": 1.0859644412994385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0121476650238037, "epoch": 2.59, "learning_rate": 3.704564666103128e-05, "loss": 0.8503, "step": 3065, "task_loss": 0.9676469564437866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7435230016708374, "epoch": 2.59, "learning_rate": 3.70414201183432e-05, "loss": 0.6776, "step": 3066, "task_loss": 0.31943991780281067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8353585004806519, "epoch": 2.59, "learning_rate": 3.7037193575655117e-05, "loss": 0.7704, "step": 3067, "task_loss": 0.6388733983039856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7831616997718811, "epoch": 2.59, "learning_rate": 3.7032967032967036e-05, "loss": 0.8672, "step": 3068, "task_loss": 0.8999489545822144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7162842154502869, "epoch": 2.59, "learning_rate": 3.702874049027895e-05, "loss": 0.7685, "step": 3069, "task_loss": 1.4781559705734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1050009727478027, "epoch": 2.59, "learning_rate": 3.702451394759087e-05, "loss": 0.8955, "step": 3070, "task_loss": 1.096408724784851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0002341270446777, "epoch": 2.6, "learning_rate": 3.7020287404902796e-05, "loss": 0.9608, "step": 3071, "task_loss": 1.78266179561615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0992339849472046, "epoch": 2.6, "learning_rate": 3.701606086221471e-05, "loss": 0.9172, "step": 3072, "task_loss": 0.5865455269813538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.010217308998108, "epoch": 2.6, "learning_rate": 3.701183431952663e-05, "loss": 1.0825, "step": 3073, "task_loss": 1.2458832263946533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.130928874015808, "epoch": 2.6, "learning_rate": 3.700760777683855e-05, "loss": 0.9746, "step": 3074, "task_loss": 0.8048464059829712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5870479941368103, "epoch": 2.6, "learning_rate": 3.700338123415047e-05, "loss": 0.6023, "step": 3075, "task_loss": 0.7509044408798218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5582225918769836, "epoch": 2.6, "learning_rate": 3.699915469146239e-05, "loss": 0.7512, "step": 3076, "task_loss": 1.2097479104995728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5444167852401733, "epoch": 2.6, "learning_rate": 3.699492814877431e-05, "loss": 0.9537, "step": 3077, "task_loss": 1.0808779001235962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6360112428665161, "epoch": 2.6, "learning_rate": 3.699070160608623e-05, "loss": 0.677, "step": 3078, "task_loss": 0.7513692378997803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5899777412414551, "epoch": 2.6, "learning_rate": 3.698647506339814e-05, "loss": 0.7193, "step": 3079, "task_loss": 1.1273689270019531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6693968176841736, "epoch": 2.6, "learning_rate": 3.698224852071006e-05, "loss": 0.986, "step": 3080, "task_loss": 0.14810732007026672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1231316328048706, "epoch": 2.6, "learning_rate": 3.697802197802198e-05, "loss": 0.7848, "step": 3081, "task_loss": 0.6202142238616943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8374344110488892, "epoch": 2.6, "learning_rate": 3.69737954353339e-05, "loss": 0.7082, "step": 3082, "task_loss": 1.0106689929962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4730343818664551, "epoch": 2.61, "learning_rate": 3.696956889264582e-05, "loss": 0.5381, "step": 3083, "task_loss": 0.5265515446662903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9261061549186707, "epoch": 2.61, "learning_rate": 3.696534234995774e-05, "loss": 0.9142, "step": 3084, "task_loss": 1.6120854616165161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8333394527435303, "epoch": 2.61, "learning_rate": 3.696111580726965e-05, "loss": 0.734, "step": 3085, "task_loss": 0.7873744964599609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5093247294425964, "epoch": 2.61, "learning_rate": 3.695688926458157e-05, "loss": 0.6228, "step": 3086, "task_loss": 0.41594576835632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5116265416145325, "epoch": 2.61, "learning_rate": 3.695266272189349e-05, "loss": 0.7679, "step": 3087, "task_loss": 0.572145402431488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8843633532524109, "epoch": 2.61, "learning_rate": 3.694843617920541e-05, "loss": 0.8531, "step": 3088, "task_loss": 1.0680478811264038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9782377481460571, "epoch": 2.61, "learning_rate": 3.694420963651733e-05, "loss": 0.8237, "step": 3089, "task_loss": 0.8365287184715271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.888074517250061, "epoch": 2.61, "learning_rate": 3.693998309382925e-05, "loss": 0.8532, "step": 3090, "task_loss": 0.8413580656051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7443833351135254, "epoch": 2.61, "learning_rate": 3.693575655114117e-05, "loss": 0.6207, "step": 3091, "task_loss": 0.16518382728099823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6082497835159302, "epoch": 2.61, "learning_rate": 3.693153000845308e-05, "loss": 0.6569, "step": 3092, "task_loss": 1.1876845359802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9244512319564819, "epoch": 2.61, "learning_rate": 3.692730346576501e-05, "loss": 0.86, "step": 3093, "task_loss": 1.145851969718933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2092303037643433, "epoch": 2.61, "learning_rate": 3.692307692307693e-05, "loss": 0.8806, "step": 3094, "task_loss": 1.143178105354309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.554682195186615, "epoch": 2.62, "learning_rate": 3.691885038038884e-05, "loss": 1.0177, "step": 3095, "task_loss": 1.0971779823303223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5559208393096924, "epoch": 2.62, "learning_rate": 3.691462383770076e-05, "loss": 0.7795, "step": 3096, "task_loss": 0.5764684081077576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7125136852264404, "epoch": 2.62, "learning_rate": 3.691039729501268e-05, "loss": 0.699, "step": 3097, "task_loss": 0.8046870827674866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.736830472946167, "epoch": 2.62, "learning_rate": 3.69061707523246e-05, "loss": 0.7584, "step": 3098, "task_loss": 1.0614959001541138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6122874617576599, "epoch": 2.62, "learning_rate": 3.690194420963652e-05, "loss": 0.7071, "step": 3099, "task_loss": 0.9087220430374146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7354944944381714, "epoch": 2.62, "learning_rate": 3.689771766694844e-05, "loss": 0.8316, "step": 3100, "task_loss": 0.5507137775421143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0724760293960571, "epoch": 2.62, "learning_rate": 3.6893491124260354e-05, "loss": 0.7883, "step": 3101, "task_loss": 1.7109241485595703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8051074147224426, "epoch": 2.62, "learning_rate": 3.6889264581572274e-05, "loss": 0.9858, "step": 3102, "task_loss": 1.4117804765701294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35728687047958374, "epoch": 2.62, "learning_rate": 3.688503803888419e-05, "loss": 0.6221, "step": 3103, "task_loss": 0.13781708478927612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8510875701904297, "epoch": 2.62, "learning_rate": 3.688081149619611e-05, "loss": 0.7583, "step": 3104, "task_loss": 0.6962308883666992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5689719915390015, "epoch": 2.62, "learning_rate": 3.687658495350803e-05, "loss": 0.841, "step": 3105, "task_loss": 0.19332218170166016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49652373790740967, "epoch": 2.63, "learning_rate": 3.687235841081995e-05, "loss": 0.7055, "step": 3106, "task_loss": 0.3223428428173065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8462597131729126, "epoch": 2.63, "learning_rate": 3.686813186813187e-05, "loss": 0.7816, "step": 3107, "task_loss": 0.37198585271835327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8038350343704224, "epoch": 2.63, "learning_rate": 3.6863905325443785e-05, "loss": 0.8849, "step": 3108, "task_loss": 0.8472291231155396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8491611480712891, "epoch": 2.63, "learning_rate": 3.6859678782755705e-05, "loss": 0.7523, "step": 3109, "task_loss": 1.155708909034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9595373272895813, "epoch": 2.63, "learning_rate": 3.685545224006763e-05, "loss": 0.7857, "step": 3110, "task_loss": 0.40872299671173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5706314444541931, "epoch": 2.63, "learning_rate": 3.6851225697379544e-05, "loss": 0.7751, "step": 3111, "task_loss": 1.0269041061401367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8550041913986206, "epoch": 2.63, "learning_rate": 3.6846999154691464e-05, "loss": 0.7515, "step": 3112, "task_loss": 0.665654718875885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4683772921562195, "epoch": 2.63, "learning_rate": 3.6842772612003384e-05, "loss": 0.6526, "step": 3113, "task_loss": 0.5071851015090942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9408434629440308, "epoch": 2.63, "learning_rate": 3.68385460693153e-05, "loss": 0.6859, "step": 3114, "task_loss": 1.917077660560608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3868627548217773, "epoch": 2.63, "learning_rate": 3.6834319526627223e-05, "loss": 1.1517, "step": 3115, "task_loss": 0.8969753384590149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6709154844284058, "epoch": 2.63, "learning_rate": 3.683009298393914e-05, "loss": 0.7277, "step": 3116, "task_loss": 0.8024963736534119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6856657266616821, "epoch": 2.63, "learning_rate": 3.6825866441251056e-05, "loss": 0.7587, "step": 3117, "task_loss": 1.1278657913208008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4586241245269775, "epoch": 2.64, "learning_rate": 3.6821639898562976e-05, "loss": 1.0479, "step": 3118, "task_loss": 2.441871404647827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38083067536354065, "epoch": 2.64, "learning_rate": 3.6817413355874896e-05, "loss": 0.6188, "step": 3119, "task_loss": 0.14536947011947632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.266404926776886, "epoch": 2.64, "learning_rate": 3.6813186813186815e-05, "loss": 0.7, "step": 3120, "task_loss": 0.01973012648522854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4878993034362793, "epoch": 2.64, "learning_rate": 3.6808960270498735e-05, "loss": 0.6608, "step": 3121, "task_loss": 0.45334842801094055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7327946424484253, "epoch": 2.64, "learning_rate": 3.6804733727810655e-05, "loss": 0.7115, "step": 3122, "task_loss": 0.09124201536178589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.828135073184967, "epoch": 2.64, "learning_rate": 3.6800507185122575e-05, "loss": 0.9087, "step": 3123, "task_loss": 0.6044538617134094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6660695672035217, "epoch": 2.64, "learning_rate": 3.679628064243449e-05, "loss": 0.6791, "step": 3124, "task_loss": 0.637639582157135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9629924893379211, "epoch": 2.64, "learning_rate": 3.679205409974641e-05, "loss": 0.8396, "step": 3125, "task_loss": 0.39505887031555176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4094371795654297, "epoch": 2.64, "learning_rate": 3.678782755705833e-05, "loss": 0.812, "step": 3126, "task_loss": 0.2473873496055603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5572298765182495, "epoch": 2.64, "learning_rate": 3.678360101437025e-05, "loss": 0.6219, "step": 3127, "task_loss": 0.7138562798500061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9096237421035767, "epoch": 2.64, "learning_rate": 3.6779374471682166e-05, "loss": 0.9448, "step": 3128, "task_loss": 0.7358607649803162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6246610879898071, "epoch": 2.64, "learning_rate": 3.6775147928994086e-05, "loss": 1.0002, "step": 3129, "task_loss": 0.6177615523338318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8174480199813843, "epoch": 2.65, "learning_rate": 3.6770921386306e-05, "loss": 0.6996, "step": 3130, "task_loss": 1.3445311784744263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7565032243728638, "epoch": 2.65, "learning_rate": 3.676669484361792e-05, "loss": 0.8786, "step": 3131, "task_loss": 0.7241485714912415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5404148101806641, "epoch": 2.65, "learning_rate": 3.6762468300929845e-05, "loss": 0.8668, "step": 3132, "task_loss": 0.15434055030345917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9013053774833679, "epoch": 2.65, "learning_rate": 3.6758241758241765e-05, "loss": 0.8664, "step": 3133, "task_loss": 0.5697086453437805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5654147863388062, "epoch": 2.65, "learning_rate": 3.675401521555368e-05, "loss": 0.8082, "step": 3134, "task_loss": 1.0989187955856323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.82607102394104, "epoch": 2.65, "learning_rate": 3.67497886728656e-05, "loss": 1.0775, "step": 3135, "task_loss": 1.3307024240493774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6478612422943115, "epoch": 2.65, "learning_rate": 3.674556213017752e-05, "loss": 0.699, "step": 3136, "task_loss": 0.6293437480926514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6773472428321838, "epoch": 2.65, "learning_rate": 3.674133558748943e-05, "loss": 0.7931, "step": 3137, "task_loss": 0.808694064617157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3730204403400421, "epoch": 2.65, "learning_rate": 3.673710904480136e-05, "loss": 0.7924, "step": 3138, "task_loss": 0.856404721736908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1635000705718994, "epoch": 2.65, "learning_rate": 3.673288250211328e-05, "loss": 0.9082, "step": 3139, "task_loss": 1.7407945394515991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3419901132583618, "epoch": 2.65, "learning_rate": 3.672865595942519e-05, "loss": 1.0011, "step": 3140, "task_loss": 1.4921413660049438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8441625237464905, "epoch": 2.65, "learning_rate": 3.672442941673711e-05, "loss": 0.8424, "step": 3141, "task_loss": 1.1122411489486694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4720938205718994, "epoch": 2.66, "learning_rate": 3.672020287404903e-05, "loss": 0.816, "step": 3142, "task_loss": 0.20232322812080383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8524852395057678, "epoch": 2.66, "learning_rate": 3.671597633136095e-05, "loss": 0.7595, "step": 3143, "task_loss": 1.4422588348388672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46059346199035645, "epoch": 2.66, "learning_rate": 3.671174978867287e-05, "loss": 0.6115, "step": 3144, "task_loss": 0.1372869461774826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9297634363174438, "epoch": 2.66, "learning_rate": 3.670752324598479e-05, "loss": 0.8001, "step": 3145, "task_loss": 1.1383341550827026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5038588047027588, "epoch": 2.66, "learning_rate": 3.67032967032967e-05, "loss": 0.7782, "step": 3146, "task_loss": 1.093130111694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1263211965560913, "epoch": 2.66, "learning_rate": 3.669907016060862e-05, "loss": 0.7513, "step": 3147, "task_loss": 1.1021108627319336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26984137296676636, "epoch": 2.66, "learning_rate": 3.669484361792054e-05, "loss": 0.7818, "step": 3148, "task_loss": 0.33520352840423584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8587289452552795, "epoch": 2.66, "learning_rate": 3.669061707523247e-05, "loss": 0.8354, "step": 3149, "task_loss": 0.7077796459197998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.590491533279419, "epoch": 2.66, "learning_rate": 3.668639053254438e-05, "loss": 0.6469, "step": 3150, "task_loss": 1.1975209712982178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8266935348510742, "epoch": 2.66, "learning_rate": 3.66821639898563e-05, "loss": 0.863, "step": 3151, "task_loss": 0.990756630897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.7702124118804932, "epoch": 2.66, "learning_rate": 3.667793744716822e-05, "loss": 0.9335, "step": 3152, "task_loss": 1.1748255491256714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5679048299789429, "epoch": 2.66, "learning_rate": 3.667371090448013e-05, "loss": 0.5897, "step": 3153, "task_loss": 0.6763615012168884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6074539422988892, "epoch": 2.67, "learning_rate": 3.666948436179205e-05, "loss": 0.6592, "step": 3154, "task_loss": 0.31726667284965515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5907617211341858, "epoch": 2.67, "learning_rate": 3.666525781910398e-05, "loss": 0.9045, "step": 3155, "task_loss": 0.6866695880889893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6058465242385864, "epoch": 2.67, "learning_rate": 3.666103127641589e-05, "loss": 0.9728, "step": 3156, "task_loss": 0.6506273746490479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5541400909423828, "epoch": 2.67, "learning_rate": 3.665680473372781e-05, "loss": 0.5683, "step": 3157, "task_loss": 0.1370454728603363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8852889537811279, "epoch": 2.67, "learning_rate": 3.665257819103973e-05, "loss": 1.1754, "step": 3158, "task_loss": 1.3376331329345703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0697664022445679, "epoch": 2.67, "learning_rate": 3.6648351648351644e-05, "loss": 0.8793, "step": 3159, "task_loss": 1.8662017583847046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9096803665161133, "epoch": 2.67, "learning_rate": 3.664412510566357e-05, "loss": 0.9165, "step": 3160, "task_loss": 0.8790223002433777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2309272289276123, "epoch": 2.67, "learning_rate": 3.663989856297549e-05, "loss": 0.7827, "step": 3161, "task_loss": 1.2024024724960327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8758441209793091, "epoch": 2.67, "learning_rate": 3.663567202028741e-05, "loss": 0.7908, "step": 3162, "task_loss": 0.8387846946716309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1439201831817627, "epoch": 2.67, "learning_rate": 3.6631445477599323e-05, "loss": 0.6686, "step": 3163, "task_loss": 1.2961164712905884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6691940426826477, "epoch": 2.67, "learning_rate": 3.662721893491124e-05, "loss": 0.5438, "step": 3164, "task_loss": 1.4465086460113525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6842027306556702, "epoch": 2.67, "learning_rate": 3.662299239222316e-05, "loss": 0.5976, "step": 3165, "task_loss": 1.9269887208938599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8079041242599487, "epoch": 2.68, "learning_rate": 3.661876584953508e-05, "loss": 0.6748, "step": 3166, "task_loss": 1.124586582183838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0641887187957764, "epoch": 2.68, "learning_rate": 3.6614539306847e-05, "loss": 0.8475, "step": 3167, "task_loss": 1.0529060363769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5148777365684509, "epoch": 2.68, "learning_rate": 3.661031276415892e-05, "loss": 0.8851, "step": 3168, "task_loss": 0.16598300635814667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9191156625747681, "epoch": 2.68, "learning_rate": 3.6606086221470835e-05, "loss": 0.946, "step": 3169, "task_loss": 0.8727797865867615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.206129550933838, "epoch": 2.68, "learning_rate": 3.6601859678782755e-05, "loss": 0.9677, "step": 3170, "task_loss": 1.1366856098175049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.522931694984436, "epoch": 2.68, "learning_rate": 3.6597633136094675e-05, "loss": 0.8306, "step": 3171, "task_loss": 0.7132547497749329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8306045532226562, "epoch": 2.68, "learning_rate": 3.6593406593406594e-05, "loss": 0.874, "step": 3172, "task_loss": 0.5333722829818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4200117588043213, "epoch": 2.68, "learning_rate": 3.6589180050718514e-05, "loss": 0.6628, "step": 3173, "task_loss": 0.33211979269981384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6182844638824463, "epoch": 2.68, "learning_rate": 3.6584953508030434e-05, "loss": 0.7342, "step": 3174, "task_loss": 1.420999526977539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8615784645080566, "epoch": 2.68, "learning_rate": 3.658072696534235e-05, "loss": 0.6684, "step": 3175, "task_loss": 1.2398861646652222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35819321870803833, "epoch": 2.68, "learning_rate": 3.6576500422654266e-05, "loss": 0.6991, "step": 3176, "task_loss": 0.8791791200637817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2637050747871399, "epoch": 2.69, "learning_rate": 3.657227387996619e-05, "loss": 0.8037, "step": 3177, "task_loss": 0.8369206786155701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.871681809425354, "epoch": 2.69, "learning_rate": 3.656804733727811e-05, "loss": 0.943, "step": 3178, "task_loss": 2.086747407913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7784783840179443, "epoch": 2.69, "learning_rate": 3.6563820794590026e-05, "loss": 0.8126, "step": 3179, "task_loss": 1.2934685945510864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6373320817947388, "epoch": 2.69, "learning_rate": 3.6559594251901945e-05, "loss": 0.6403, "step": 3180, "task_loss": 0.36190930008888245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7173376083374023, "epoch": 2.69, "learning_rate": 3.6555367709213865e-05, "loss": 0.8833, "step": 3181, "task_loss": 0.8949460983276367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46418094635009766, "epoch": 2.69, "learning_rate": 3.6551141166525785e-05, "loss": 0.8114, "step": 3182, "task_loss": 0.15461945533752441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5413683652877808, "epoch": 2.69, "learning_rate": 3.6546914623837705e-05, "loss": 0.6256, "step": 3183, "task_loss": 0.36658158898353577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.017490029335022, "epoch": 2.69, "learning_rate": 3.6542688081149624e-05, "loss": 0.6705, "step": 3184, "task_loss": 0.505678117275238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6262437701225281, "epoch": 2.69, "learning_rate": 3.653846153846154e-05, "loss": 0.6796, "step": 3185, "task_loss": 0.4420837461948395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7411134839057922, "epoch": 2.69, "learning_rate": 3.653423499577346e-05, "loss": 0.5585, "step": 3186, "task_loss": 0.5205428004264832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8411450386047363, "epoch": 2.69, "learning_rate": 3.653000845308538e-05, "loss": 0.8477, "step": 3187, "task_loss": 0.38130441308021545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7072612643241882, "epoch": 2.69, "learning_rate": 3.6525781910397297e-05, "loss": 0.8493, "step": 3188, "task_loss": 1.6934208869934082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8371607065200806, "epoch": 2.7, "learning_rate": 3.6521555367709216e-05, "loss": 0.7935, "step": 3189, "task_loss": 0.3760598301887512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7347347736358643, "epoch": 2.7, "learning_rate": 3.6517328825021136e-05, "loss": 0.6921, "step": 3190, "task_loss": 0.7482430934906006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.421387255191803, "epoch": 2.7, "learning_rate": 3.6513102282333056e-05, "loss": 0.703, "step": 3191, "task_loss": 0.4672171175479889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44681286811828613, "epoch": 2.7, "learning_rate": 3.650887573964497e-05, "loss": 0.8332, "step": 3192, "task_loss": 0.6652718782424927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8775023818016052, "epoch": 2.7, "learning_rate": 3.650464919695689e-05, "loss": 0.8674, "step": 3193, "task_loss": 1.6609073877334595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8900438547134399, "epoch": 2.7, "learning_rate": 3.6500422654268815e-05, "loss": 0.8038, "step": 3194, "task_loss": 0.7634229063987732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2499973773956299, "epoch": 2.7, "learning_rate": 3.649619611158073e-05, "loss": 0.7465, "step": 3195, "task_loss": 1.215793490409851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9322706460952759, "epoch": 2.7, "learning_rate": 3.649196956889265e-05, "loss": 0.7743, "step": 3196, "task_loss": 0.750275194644928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9437536001205444, "epoch": 2.7, "learning_rate": 3.648774302620457e-05, "loss": 0.7276, "step": 3197, "task_loss": 0.6326310038566589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7384657263755798, "epoch": 2.7, "learning_rate": 3.648351648351648e-05, "loss": 0.7106, "step": 3198, "task_loss": 0.2647033631801605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5414721965789795, "epoch": 2.7, "learning_rate": 3.647928994082841e-05, "loss": 0.6224, "step": 3199, "task_loss": 0.3809734880924225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.729843020439148, "epoch": 2.7, "learning_rate": 3.647506339814033e-05, "loss": 0.6386, "step": 3200, "task_loss": 1.819659948348999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34137946367263794, "epoch": 2.71, "learning_rate": 3.647083685545224e-05, "loss": 0.6281, "step": 3201, "task_loss": 1.0081596374511719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7048791646957397, "epoch": 2.71, "learning_rate": 3.646661031276416e-05, "loss": 0.749, "step": 3202, "task_loss": 1.5170516967773438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4699389636516571, "epoch": 2.71, "learning_rate": 3.646238377007608e-05, "loss": 0.6961, "step": 3203, "task_loss": 0.861007809638977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7379894256591797, "epoch": 2.71, "learning_rate": 3.6458157227388e-05, "loss": 0.6025, "step": 3204, "task_loss": 0.4677521288394928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6614158749580383, "epoch": 2.71, "learning_rate": 3.645393068469992e-05, "loss": 0.6066, "step": 3205, "task_loss": 0.6612241268157959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.754846453666687, "epoch": 2.71, "learning_rate": 3.644970414201184e-05, "loss": 0.9757, "step": 3206, "task_loss": 1.5634241104125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3605467081069946, "epoch": 2.71, "learning_rate": 3.644547759932376e-05, "loss": 0.9866, "step": 3207, "task_loss": 0.7468777894973755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5064225196838379, "epoch": 2.71, "learning_rate": 3.644125105663567e-05, "loss": 0.777, "step": 3208, "task_loss": 0.5832480192184448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1239912509918213, "epoch": 2.71, "learning_rate": 3.643702451394759e-05, "loss": 0.9966, "step": 3209, "task_loss": 2.2184693813323975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5373660326004028, "epoch": 2.71, "learning_rate": 3.643279797125951e-05, "loss": 0.6815, "step": 3210, "task_loss": 0.12213116139173508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.779929518699646, "epoch": 2.71, "learning_rate": 3.642857142857143e-05, "loss": 0.7837, "step": 3211, "task_loss": 0.4112778902053833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9307505488395691, "epoch": 2.71, "learning_rate": 3.642434488588335e-05, "loss": 0.9188, "step": 3212, "task_loss": 0.6986552476882935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1074609756469727, "epoch": 2.72, "learning_rate": 3.642011834319527e-05, "loss": 0.7545, "step": 3213, "task_loss": 0.7192916870117188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.714178204536438, "epoch": 2.72, "learning_rate": 3.641589180050718e-05, "loss": 0.8383, "step": 3214, "task_loss": 0.8756546378135681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1989495754241943, "epoch": 2.72, "learning_rate": 3.64116652578191e-05, "loss": 0.8403, "step": 3215, "task_loss": 0.739342451095581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8341965079307556, "epoch": 2.72, "learning_rate": 3.640743871513103e-05, "loss": 0.8019, "step": 3216, "task_loss": 0.7803371548652649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3988834023475647, "epoch": 2.72, "learning_rate": 3.640321217244294e-05, "loss": 0.6493, "step": 3217, "task_loss": 0.3000013828277588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3551662862300873, "epoch": 2.72, "learning_rate": 3.639898562975486e-05, "loss": 0.8931, "step": 3218, "task_loss": 0.38305312395095825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3575408458709717, "epoch": 2.72, "learning_rate": 3.639475908706678e-05, "loss": 0.8913, "step": 3219, "task_loss": 0.46277594566345215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7194536924362183, "epoch": 2.72, "learning_rate": 3.63905325443787e-05, "loss": 0.8869, "step": 3220, "task_loss": 0.46771782636642456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5157691240310669, "epoch": 2.72, "learning_rate": 3.638630600169062e-05, "loss": 0.5827, "step": 3221, "task_loss": 0.5627987384796143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.763983428478241, "epoch": 2.72, "learning_rate": 3.638207945900254e-05, "loss": 0.6576, "step": 3222, "task_loss": 1.1351430416107178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0026105642318726, "epoch": 2.72, "learning_rate": 3.637785291631446e-05, "loss": 0.821, "step": 3223, "task_loss": 1.5181260108947754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0317902565002441, "epoch": 2.72, "learning_rate": 3.637362637362637e-05, "loss": 1.0015, "step": 3224, "task_loss": 1.0426018238067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7107886075973511, "epoch": 2.73, "learning_rate": 3.636939983093829e-05, "loss": 0.7966, "step": 3225, "task_loss": 0.211040198802948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9026594161987305, "epoch": 2.73, "learning_rate": 3.636517328825021e-05, "loss": 0.9178, "step": 3226, "task_loss": 1.6117362976074219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0398650169372559, "epoch": 2.73, "learning_rate": 3.636094674556213e-05, "loss": 0.8409, "step": 3227, "task_loss": 1.1032884120941162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7302571535110474, "epoch": 2.73, "learning_rate": 3.635672020287405e-05, "loss": 1.0826, "step": 3228, "task_loss": 0.989496648311615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4171563982963562, "epoch": 2.73, "learning_rate": 3.635249366018597e-05, "loss": 0.6483, "step": 3229, "task_loss": 0.3753688335418701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5672946572303772, "epoch": 2.73, "learning_rate": 3.6348267117497885e-05, "loss": 0.8951, "step": 3230, "task_loss": 1.9011656045913696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8212553858757019, "epoch": 2.73, "learning_rate": 3.6344040574809805e-05, "loss": 0.8339, "step": 3231, "task_loss": 0.9959295988082886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.012403130531311, "epoch": 2.73, "learning_rate": 3.6339814032121724e-05, "loss": 0.8895, "step": 3232, "task_loss": 1.1733182668685913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5102289915084839, "epoch": 2.73, "learning_rate": 3.6335587489433644e-05, "loss": 0.6307, "step": 3233, "task_loss": 0.38015079498291016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6747186779975891, "epoch": 2.73, "learning_rate": 3.6331360946745564e-05, "loss": 0.7663, "step": 3234, "task_loss": 0.34005922079086304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8523671627044678, "epoch": 2.73, "learning_rate": 3.6327134404057484e-05, "loss": 0.913, "step": 3235, "task_loss": 0.8009073734283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8001285791397095, "epoch": 2.73, "learning_rate": 3.63229078613694e-05, "loss": 0.7568, "step": 3236, "task_loss": 0.5754767656326294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5291897058486938, "epoch": 2.74, "learning_rate": 3.6318681318681316e-05, "loss": 0.5505, "step": 3237, "task_loss": 0.32463839650154114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.691354513168335, "epoch": 2.74, "learning_rate": 3.631445477599324e-05, "loss": 0.71, "step": 3238, "task_loss": 0.7392473816871643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5998837947845459, "epoch": 2.74, "learning_rate": 3.631022823330516e-05, "loss": 0.6833, "step": 3239, "task_loss": 0.45310693979263306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5507205128669739, "epoch": 2.74, "learning_rate": 3.6306001690617076e-05, "loss": 0.7251, "step": 3240, "task_loss": 1.869431972503662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6730242967605591, "epoch": 2.74, "learning_rate": 3.6301775147928995e-05, "loss": 0.6456, "step": 3241, "task_loss": 0.3893422484397888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7975687980651855, "epoch": 2.74, "learning_rate": 3.6297548605240915e-05, "loss": 0.6181, "step": 3242, "task_loss": 0.5642158389091492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4054068922996521, "epoch": 2.74, "learning_rate": 3.6293322062552835e-05, "loss": 0.5695, "step": 3243, "task_loss": 0.4393438696861267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1712977886199951, "epoch": 2.74, "learning_rate": 3.6289095519864754e-05, "loss": 1.0302, "step": 3244, "task_loss": 0.899411141872406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5320630073547363, "epoch": 2.74, "learning_rate": 3.6284868977176674e-05, "loss": 0.8687, "step": 3245, "task_loss": 0.998672366142273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9773420095443726, "epoch": 2.74, "learning_rate": 3.628064243448859e-05, "loss": 0.8313, "step": 3246, "task_loss": 1.0046602487564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0655792951583862, "epoch": 2.74, "learning_rate": 3.627641589180051e-05, "loss": 0.8468, "step": 3247, "task_loss": 0.8440394997596741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7294139266014099, "epoch": 2.75, "learning_rate": 3.627218934911243e-05, "loss": 0.7729, "step": 3248, "task_loss": 0.6787798404693604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6052408218383789, "epoch": 2.75, "learning_rate": 3.6267962806424346e-05, "loss": 0.6962, "step": 3249, "task_loss": 0.303617388010025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7759650349617004, "epoch": 2.75, "learning_rate": 3.6263736263736266e-05, "loss": 0.6636, "step": 3250, "task_loss": 0.6617615818977356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1901512145996094, "epoch": 2.75, "learning_rate": 3.6259509721048186e-05, "loss": 0.9352, "step": 3251, "task_loss": 0.8724523782730103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2492327690124512, "epoch": 2.75, "learning_rate": 3.6255283178360106e-05, "loss": 0.9276, "step": 3252, "task_loss": 1.086970329284668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43723583221435547, "epoch": 2.75, "learning_rate": 3.625105663567202e-05, "loss": 0.702, "step": 3253, "task_loss": 0.5267838835716248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8055061101913452, "epoch": 2.75, "learning_rate": 3.624683009298394e-05, "loss": 0.8433, "step": 3254, "task_loss": 1.1333932876586914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2522903680801392, "epoch": 2.75, "learning_rate": 3.6242603550295865e-05, "loss": 0.9854, "step": 3255, "task_loss": 0.5626524090766907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.663620114326477, "epoch": 2.75, "learning_rate": 3.623837700760778e-05, "loss": 0.721, "step": 3256, "task_loss": 1.1029486656188965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6475971937179565, "epoch": 2.75, "learning_rate": 3.62341504649197e-05, "loss": 0.5288, "step": 3257, "task_loss": 0.6983718872070312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8854166865348816, "epoch": 2.75, "learning_rate": 3.622992392223162e-05, "loss": 0.7067, "step": 3258, "task_loss": 0.9810651540756226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7401001453399658, "epoch": 2.75, "learning_rate": 3.622569737954353e-05, "loss": 0.8089, "step": 3259, "task_loss": 1.2944881916046143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0155483484268188, "epoch": 2.76, "learning_rate": 3.622147083685546e-05, "loss": 0.8513, "step": 3260, "task_loss": 0.6485295295715332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.96612548828125, "epoch": 2.76, "learning_rate": 3.6217244294167376e-05, "loss": 0.8414, "step": 3261, "task_loss": 0.5453760027885437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9183001518249512, "epoch": 2.76, "learning_rate": 3.621301775147929e-05, "loss": 0.7371, "step": 3262, "task_loss": 1.088436484336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5886558294296265, "epoch": 2.76, "learning_rate": 3.620879120879121e-05, "loss": 0.6881, "step": 3263, "task_loss": 0.9603484272956848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5060992240905762, "epoch": 2.76, "learning_rate": 3.620456466610313e-05, "loss": 0.6228, "step": 3264, "task_loss": 0.31005531549453735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8019952178001404, "epoch": 2.76, "learning_rate": 3.620033812341505e-05, "loss": 0.666, "step": 3265, "task_loss": 1.4603077173233032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7681384086608887, "epoch": 2.76, "learning_rate": 3.619611158072697e-05, "loss": 0.8473, "step": 3266, "task_loss": 0.13752584159374237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7811859846115112, "epoch": 2.76, "learning_rate": 3.619188503803889e-05, "loss": 0.7979, "step": 3267, "task_loss": 0.6954386234283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1156175136566162, "epoch": 2.76, "learning_rate": 3.618765849535081e-05, "loss": 0.9066, "step": 3268, "task_loss": 1.5043240785598755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1991462707519531, "epoch": 2.76, "learning_rate": 3.618343195266272e-05, "loss": 0.9056, "step": 3269, "task_loss": 1.1476621627807617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7136577367782593, "epoch": 2.76, "learning_rate": 3.617920540997464e-05, "loss": 0.8466, "step": 3270, "task_loss": 0.47285696864128113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7878576517105103, "epoch": 2.76, "learning_rate": 3.617497886728656e-05, "loss": 0.9725, "step": 3271, "task_loss": 0.29310181736946106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8063193559646606, "epoch": 2.77, "learning_rate": 3.617075232459848e-05, "loss": 0.6592, "step": 3272, "task_loss": 0.8167861700057983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5706242918968201, "epoch": 2.77, "learning_rate": 3.61665257819104e-05, "loss": 0.6352, "step": 3273, "task_loss": 1.3761039972305298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6302134990692139, "epoch": 2.77, "learning_rate": 3.616229923922232e-05, "loss": 0.616, "step": 3274, "task_loss": 0.3765747547149658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47885486483573914, "epoch": 2.77, "learning_rate": 3.615807269653423e-05, "loss": 0.6738, "step": 3275, "task_loss": 0.09418342262506485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8178308606147766, "epoch": 2.77, "learning_rate": 3.615384615384615e-05, "loss": 0.8208, "step": 3276, "task_loss": 1.2406654357910156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7907090187072754, "epoch": 2.77, "learning_rate": 3.614961961115808e-05, "loss": 0.7618, "step": 3277, "task_loss": 0.3515041172504425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.551092803478241, "epoch": 2.77, "learning_rate": 3.614539306847e-05, "loss": 0.6284, "step": 3278, "task_loss": 0.6256594657897949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7914831638336182, "epoch": 2.77, "learning_rate": 3.614116652578191e-05, "loss": 0.7499, "step": 3279, "task_loss": 1.0081613063812256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8618583679199219, "epoch": 2.77, "learning_rate": 3.613693998309383e-05, "loss": 1.0505, "step": 3280, "task_loss": 0.5456560254096985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6455854177474976, "epoch": 2.77, "learning_rate": 3.613271344040575e-05, "loss": 0.8675, "step": 3281, "task_loss": 1.8859734535217285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7248468399047852, "epoch": 2.77, "learning_rate": 3.6128486897717664e-05, "loss": 0.9598, "step": 3282, "task_loss": 1.896436095237732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39074623584747314, "epoch": 2.77, "learning_rate": 3.612426035502959e-05, "loss": 0.709, "step": 3283, "task_loss": 0.208869069814682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5402849316596985, "epoch": 2.78, "learning_rate": 3.612003381234151e-05, "loss": 0.7581, "step": 3284, "task_loss": 0.8791276216506958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6216833591461182, "epoch": 2.78, "learning_rate": 3.611580726965342e-05, "loss": 0.9744, "step": 3285, "task_loss": 0.6507049202919006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9934566020965576, "epoch": 2.78, "learning_rate": 3.611158072696534e-05, "loss": 1.1314, "step": 3286, "task_loss": 0.9094664454460144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0117695331573486, "epoch": 2.78, "learning_rate": 3.610735418427726e-05, "loss": 0.7291, "step": 3287, "task_loss": 0.9870509505271912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5655584335327148, "epoch": 2.78, "learning_rate": 3.610312764158918e-05, "loss": 0.8105, "step": 3288, "task_loss": 0.7977146506309509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6419283151626587, "epoch": 2.78, "learning_rate": 3.60989010989011e-05, "loss": 0.7212, "step": 3289, "task_loss": 0.22209587693214417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7782756090164185, "epoch": 2.78, "learning_rate": 3.609467455621302e-05, "loss": 0.7048, "step": 3290, "task_loss": 0.5902601480484009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7808916568756104, "epoch": 2.78, "learning_rate": 3.6090448013524935e-05, "loss": 0.9701, "step": 3291, "task_loss": 0.38256022334098816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.695368766784668, "epoch": 2.78, "learning_rate": 3.6086221470836855e-05, "loss": 0.7582, "step": 3292, "task_loss": 0.7444636821746826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5389489531517029, "epoch": 2.78, "learning_rate": 3.6081994928148774e-05, "loss": 0.7329, "step": 3293, "task_loss": 0.7681195139884949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.747118353843689, "epoch": 2.78, "learning_rate": 3.6077768385460694e-05, "loss": 0.7452, "step": 3294, "task_loss": 0.6000422239303589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5363589525222778, "epoch": 2.78, "learning_rate": 3.6073541842772614e-05, "loss": 0.7354, "step": 3295, "task_loss": 0.9708808660507202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.915900707244873, "epoch": 2.79, "learning_rate": 3.6069315300084533e-05, "loss": 0.7257, "step": 3296, "task_loss": 0.6956742405891418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0911142826080322, "epoch": 2.79, "learning_rate": 3.606508875739645e-05, "loss": 0.7316, "step": 3297, "task_loss": 1.4301362037658691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8773866295814514, "epoch": 2.79, "learning_rate": 3.6060862214708366e-05, "loss": 0.6273, "step": 3298, "task_loss": 0.9789811968803406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6220656633377075, "epoch": 2.79, "learning_rate": 3.6056635672020286e-05, "loss": 0.7831, "step": 3299, "task_loss": 0.2030237913131714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38913747668266296, "epoch": 2.79, "learning_rate": 3.605240912933221e-05, "loss": 0.745, "step": 3300, "task_loss": 1.2848995923995972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6990540027618408, "epoch": 2.79, "learning_rate": 3.6048182586644125e-05, "loss": 0.9037, "step": 3301, "task_loss": 0.8262940645217896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7512128353118896, "epoch": 2.79, "learning_rate": 3.6043956043956045e-05, "loss": 0.8033, "step": 3302, "task_loss": 0.40923023223876953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7678865194320679, "epoch": 2.79, "learning_rate": 3.6039729501267965e-05, "loss": 0.9258, "step": 3303, "task_loss": 0.954060435295105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0199294090270996, "epoch": 2.79, "learning_rate": 3.603550295857988e-05, "loss": 0.9878, "step": 3304, "task_loss": 1.248643398284912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45711737871170044, "epoch": 2.79, "learning_rate": 3.6031276415891804e-05, "loss": 0.8528, "step": 3305, "task_loss": 0.7803612351417542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.432719886302948, "epoch": 2.79, "learning_rate": 3.6027049873203724e-05, "loss": 0.5504, "step": 3306, "task_loss": 0.5674700736999512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6896347999572754, "epoch": 2.79, "learning_rate": 3.6022823330515644e-05, "loss": 0.7025, "step": 3307, "task_loss": 0.706199586391449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5712430477142334, "epoch": 2.8, "learning_rate": 3.601859678782756e-05, "loss": 0.7422, "step": 3308, "task_loss": 0.8810210824012756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.841557502746582, "epoch": 2.8, "learning_rate": 3.6014370245139477e-05, "loss": 0.9066, "step": 3309, "task_loss": 1.0013234615325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.673639178276062, "epoch": 2.8, "learning_rate": 3.6010143702451396e-05, "loss": 0.7519, "step": 3310, "task_loss": 1.6614902019500732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0699958801269531, "epoch": 2.8, "learning_rate": 3.6005917159763316e-05, "loss": 0.8013, "step": 3311, "task_loss": 1.4993906021118164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.025099515914917, "epoch": 2.8, "learning_rate": 3.6001690617075236e-05, "loss": 0.84, "step": 3312, "task_loss": 0.6921936273574829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5464345216751099, "epoch": 2.8, "learning_rate": 3.5997464074387155e-05, "loss": 0.5862, "step": 3313, "task_loss": 0.3845492899417877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7522363662719727, "epoch": 2.8, "learning_rate": 3.599323753169907e-05, "loss": 0.7042, "step": 3314, "task_loss": 1.099902629852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5170813202857971, "epoch": 2.8, "learning_rate": 3.598901098901099e-05, "loss": 0.5995, "step": 3315, "task_loss": 0.14206846058368683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8377299308776855, "epoch": 2.8, "learning_rate": 3.598478444632291e-05, "loss": 0.8144, "step": 3316, "task_loss": 0.6457552909851074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7462509274482727, "epoch": 2.8, "learning_rate": 3.598055790363483e-05, "loss": 0.6059, "step": 3317, "task_loss": 0.8252855539321899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3696131110191345, "epoch": 2.8, "learning_rate": 3.597633136094675e-05, "loss": 0.7073, "step": 3318, "task_loss": 0.29601961374282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3777548670768738, "epoch": 2.81, "learning_rate": 3.597210481825867e-05, "loss": 0.6954, "step": 3319, "task_loss": 0.8024699687957764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9105001091957092, "epoch": 2.81, "learning_rate": 3.596787827557058e-05, "loss": 0.6867, "step": 3320, "task_loss": 0.9338986277580261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5200917720794678, "epoch": 2.81, "learning_rate": 3.59636517328825e-05, "loss": 0.8007, "step": 3321, "task_loss": 0.6254926919937134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20663976669311523, "epoch": 2.81, "learning_rate": 3.5959425190194426e-05, "loss": 0.5437, "step": 3322, "task_loss": 0.04157170653343201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5326728820800781, "epoch": 2.81, "learning_rate": 3.5955198647506346e-05, "loss": 0.5746, "step": 3323, "task_loss": 0.3979988694190979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6816906332969666, "epoch": 2.81, "learning_rate": 3.595097210481826e-05, "loss": 0.8078, "step": 3324, "task_loss": 0.4956477880477905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7264906764030457, "epoch": 2.81, "learning_rate": 3.594674556213018e-05, "loss": 0.706, "step": 3325, "task_loss": 0.3928796052932739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7272849082946777, "epoch": 2.81, "learning_rate": 3.59425190194421e-05, "loss": 0.6891, "step": 3326, "task_loss": 0.6252204179763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22119112312793732, "epoch": 2.81, "learning_rate": 3.593829247675402e-05, "loss": 0.5689, "step": 3327, "task_loss": 0.45182284712791443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8769271373748779, "epoch": 2.81, "learning_rate": 3.593406593406594e-05, "loss": 0.6481, "step": 3328, "task_loss": 0.7757990956306458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5670448541641235, "epoch": 2.81, "learning_rate": 3.592983939137786e-05, "loss": 0.7003, "step": 3329, "task_loss": 1.3578851222991943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5573376417160034, "epoch": 2.81, "learning_rate": 3.592561284868977e-05, "loss": 0.9923, "step": 3330, "task_loss": 0.48840057849884033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5479972958564758, "epoch": 2.82, "learning_rate": 3.592138630600169e-05, "loss": 0.757, "step": 3331, "task_loss": 0.4727529287338257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.484941840171814, "epoch": 2.82, "learning_rate": 3.591715976331361e-05, "loss": 0.8129, "step": 3332, "task_loss": 1.3712893724441528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9272233247756958, "epoch": 2.82, "learning_rate": 3.591293322062553e-05, "loss": 0.8068, "step": 3333, "task_loss": 1.2493752241134644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7020160555839539, "epoch": 2.82, "learning_rate": 3.590870667793745e-05, "loss": 0.6155, "step": 3334, "task_loss": 0.7037437558174133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7469232678413391, "epoch": 2.82, "learning_rate": 3.590448013524937e-05, "loss": 0.6887, "step": 3335, "task_loss": 0.8930742740631104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22888050973415375, "epoch": 2.82, "learning_rate": 3.590025359256129e-05, "loss": 0.7191, "step": 3336, "task_loss": 0.0376892127096653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7355809211730957, "epoch": 2.82, "learning_rate": 3.58960270498732e-05, "loss": 0.8127, "step": 3337, "task_loss": 0.6290755271911621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6525412797927856, "epoch": 2.82, "learning_rate": 3.589180050718512e-05, "loss": 0.9723, "step": 3338, "task_loss": 0.6600795984268188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.577083945274353, "epoch": 2.82, "learning_rate": 3.588757396449705e-05, "loss": 0.7216, "step": 3339, "task_loss": 0.5928191542625427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4327682554721832, "epoch": 2.82, "learning_rate": 3.588334742180896e-05, "loss": 0.6027, "step": 3340, "task_loss": 0.6199809312820435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29940110445022583, "epoch": 2.82, "learning_rate": 3.587912087912088e-05, "loss": 0.7863, "step": 3341, "task_loss": 0.24301491677761078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9664512872695923, "epoch": 2.82, "learning_rate": 3.58748943364328e-05, "loss": 0.7198, "step": 3342, "task_loss": 1.0228712558746338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4342517852783203, "epoch": 2.83, "learning_rate": 3.5870667793744714e-05, "loss": 0.9083, "step": 3343, "task_loss": 0.7765054702758789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2263702005147934, "epoch": 2.83, "learning_rate": 3.586644125105664e-05, "loss": 0.5907, "step": 3344, "task_loss": 0.33594268560409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7420411109924316, "epoch": 2.83, "learning_rate": 3.586221470836856e-05, "loss": 0.7812, "step": 3345, "task_loss": 0.3694157898426056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.818916916847229, "epoch": 2.83, "learning_rate": 3.585798816568047e-05, "loss": 0.7194, "step": 3346, "task_loss": 0.5279682278633118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3660365343093872, "epoch": 2.83, "learning_rate": 3.585376162299239e-05, "loss": 0.601, "step": 3347, "task_loss": 0.21023991703987122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8920690417289734, "epoch": 2.83, "learning_rate": 3.584953508030431e-05, "loss": 0.678, "step": 3348, "task_loss": 1.6149805784225464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.581025242805481, "epoch": 2.83, "learning_rate": 3.584530853761623e-05, "loss": 0.6872, "step": 3349, "task_loss": 1.0851932764053345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9683977365493774, "epoch": 2.83, "learning_rate": 3.584108199492815e-05, "loss": 0.6892, "step": 3350, "task_loss": 1.5431760549545288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6526146531105042, "epoch": 2.83, "learning_rate": 3.583685545224007e-05, "loss": 0.8414, "step": 3351, "task_loss": 1.0233675241470337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6186125874519348, "epoch": 2.83, "learning_rate": 3.583262890955199e-05, "loss": 0.6547, "step": 3352, "task_loss": 1.1729212999343872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9383249282836914, "epoch": 2.83, "learning_rate": 3.5828402366863904e-05, "loss": 0.6468, "step": 3353, "task_loss": 0.399311900138855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6464821696281433, "epoch": 2.83, "learning_rate": 3.5824175824175824e-05, "loss": 0.8103, "step": 3354, "task_loss": 1.1796218156814575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1304669380187988, "epoch": 2.84, "learning_rate": 3.5819949281487744e-05, "loss": 0.8737, "step": 3355, "task_loss": 1.3280930519104004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9701012372970581, "epoch": 2.84, "learning_rate": 3.5815722738799664e-05, "loss": 0.8338, "step": 3356, "task_loss": 0.9874003529548645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5631482601165771, "epoch": 2.84, "learning_rate": 3.581149619611158e-05, "loss": 0.7308, "step": 3357, "task_loss": 0.5956993103027344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5462734699249268, "epoch": 2.84, "learning_rate": 3.58072696534235e-05, "loss": 0.5028, "step": 3358, "task_loss": 0.6815320253372192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9117316007614136, "epoch": 2.84, "learning_rate": 3.5803043110735416e-05, "loss": 0.9044, "step": 3359, "task_loss": 1.6148408651351929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7821139097213745, "epoch": 2.84, "learning_rate": 3.5798816568047336e-05, "loss": 0.9078, "step": 3360, "task_loss": 1.0132858753204346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3394583761692047, "epoch": 2.84, "learning_rate": 3.579459002535926e-05, "loss": 0.7167, "step": 3361, "task_loss": 0.41234830021858215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4090193510055542, "epoch": 2.84, "learning_rate": 3.5790363482671175e-05, "loss": 0.689, "step": 3362, "task_loss": 0.42703545093536377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9072380065917969, "epoch": 2.84, "learning_rate": 3.5786136939983095e-05, "loss": 0.9806, "step": 3363, "task_loss": 0.5319318771362305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8753175735473633, "epoch": 2.84, "learning_rate": 3.5781910397295015e-05, "loss": 0.8924, "step": 3364, "task_loss": 1.013991355895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3981405198574066, "epoch": 2.84, "learning_rate": 3.5777683854606934e-05, "loss": 0.7975, "step": 3365, "task_loss": 0.9499750733375549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5115039348602295, "epoch": 2.84, "learning_rate": 3.5773457311918854e-05, "loss": 0.6109, "step": 3366, "task_loss": 1.3677561283111572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.64418625831604, "epoch": 2.85, "learning_rate": 3.5769230769230774e-05, "loss": 0.9271, "step": 3367, "task_loss": 1.1890087127685547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6919095516204834, "epoch": 2.85, "learning_rate": 3.5765004226542694e-05, "loss": 0.7689, "step": 3368, "task_loss": 1.3207236528396606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5973398685455322, "epoch": 2.85, "learning_rate": 3.576077768385461e-05, "loss": 0.6389, "step": 3369, "task_loss": 0.20922210812568665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.735552191734314, "epoch": 2.85, "learning_rate": 3.5756551141166526e-05, "loss": 0.8304, "step": 3370, "task_loss": 0.8373743891716003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6899963617324829, "epoch": 2.85, "learning_rate": 3.5752324598478446e-05, "loss": 0.6597, "step": 3371, "task_loss": 0.9547471404075623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0091354846954346, "epoch": 2.85, "learning_rate": 3.5748098055790366e-05, "loss": 0.8593, "step": 3372, "task_loss": 0.6888219118118286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5795108079910278, "epoch": 2.85, "learning_rate": 3.5743871513102286e-05, "loss": 0.4736, "step": 3373, "task_loss": 0.6050034165382385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7279648184776306, "epoch": 2.85, "learning_rate": 3.5739644970414205e-05, "loss": 0.7487, "step": 3374, "task_loss": 0.7485307455062866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7954450845718384, "epoch": 2.85, "learning_rate": 3.573541842772612e-05, "loss": 0.6612, "step": 3375, "task_loss": 0.5137763619422913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5496881008148193, "epoch": 2.85, "learning_rate": 3.573119188503804e-05, "loss": 0.709, "step": 3376, "task_loss": 1.0196874141693115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0830185413360596, "epoch": 2.85, "learning_rate": 3.572696534234996e-05, "loss": 0.8297, "step": 3377, "task_loss": 0.9782475829124451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5371267795562744, "epoch": 2.85, "learning_rate": 3.572273879966188e-05, "loss": 0.5906, "step": 3378, "task_loss": 0.5706921815872192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6014994382858276, "epoch": 2.86, "learning_rate": 3.57185122569738e-05, "loss": 0.9173, "step": 3379, "task_loss": 0.9167914986610413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6044281721115112, "epoch": 2.86, "learning_rate": 3.571428571428572e-05, "loss": 0.7661, "step": 3380, "task_loss": 0.6776968836784363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4123568534851074, "epoch": 2.86, "learning_rate": 3.571005917159764e-05, "loss": 0.6166, "step": 3381, "task_loss": 0.21269316971302032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5059281587600708, "epoch": 2.86, "learning_rate": 3.570583262890955e-05, "loss": 0.8724, "step": 3382, "task_loss": 0.5654011964797974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6949788331985474, "epoch": 2.86, "learning_rate": 3.5701606086221476e-05, "loss": 0.7245, "step": 3383, "task_loss": 0.6039469838142395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1372690200805664, "epoch": 2.86, "learning_rate": 3.5697379543533396e-05, "loss": 0.855, "step": 3384, "task_loss": 1.0438274145126343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5677127242088318, "epoch": 2.86, "learning_rate": 3.569315300084531e-05, "loss": 0.8538, "step": 3385, "task_loss": 1.0312906503677368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7746845483779907, "epoch": 2.86, "learning_rate": 3.568892645815723e-05, "loss": 0.759, "step": 3386, "task_loss": 0.7188562154769897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.209460735321045, "epoch": 2.86, "learning_rate": 3.568469991546915e-05, "loss": 0.8257, "step": 3387, "task_loss": 1.9700309038162231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5082601308822632, "epoch": 2.86, "learning_rate": 3.568047337278107e-05, "loss": 0.7571, "step": 3388, "task_loss": 0.8950508832931519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6700512170791626, "epoch": 2.86, "learning_rate": 3.567624683009299e-05, "loss": 0.6723, "step": 3389, "task_loss": 0.5086400508880615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7176834344863892, "epoch": 2.87, "learning_rate": 3.567202028740491e-05, "loss": 0.6147, "step": 3390, "task_loss": 0.8759731650352478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7018980979919434, "epoch": 2.87, "learning_rate": 3.566779374471682e-05, "loss": 0.6998, "step": 3391, "task_loss": 1.050652265548706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7234511375427246, "epoch": 2.87, "learning_rate": 3.566356720202874e-05, "loss": 0.8682, "step": 3392, "task_loss": 0.8712239265441895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.788881778717041, "epoch": 2.87, "learning_rate": 3.565934065934066e-05, "loss": 0.7917, "step": 3393, "task_loss": 1.0008960962295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8022782802581787, "epoch": 2.87, "learning_rate": 3.565511411665258e-05, "loss": 0.596, "step": 3394, "task_loss": 0.4843157231807709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5188448429107666, "epoch": 2.87, "learning_rate": 3.56508875739645e-05, "loss": 0.8625, "step": 3395, "task_loss": 0.46342137455940247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7050491571426392, "epoch": 2.87, "learning_rate": 3.564666103127642e-05, "loss": 0.6855, "step": 3396, "task_loss": 0.6222397089004517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.710543692111969, "epoch": 2.87, "learning_rate": 3.564243448858834e-05, "loss": 0.7413, "step": 3397, "task_loss": 0.44308194518089294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5758408904075623, "epoch": 2.87, "learning_rate": 3.563820794590025e-05, "loss": 0.6583, "step": 3398, "task_loss": 0.49602827429771423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6584762334823608, "epoch": 2.87, "learning_rate": 3.563398140321217e-05, "loss": 0.7451, "step": 3399, "task_loss": 1.189739465713501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.647689700126648, "epoch": 2.87, "learning_rate": 3.56297548605241e-05, "loss": 0.6564, "step": 3400, "task_loss": 1.050286889076233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7365431785583496, "epoch": 2.87, "learning_rate": 3.562552831783601e-05, "loss": 0.5397, "step": 3401, "task_loss": 0.9292466044425964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6733627319335938, "epoch": 2.88, "learning_rate": 3.562130177514793e-05, "loss": 0.7334, "step": 3402, "task_loss": 0.638867199420929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7490792274475098, "epoch": 2.88, "learning_rate": 3.561707523245985e-05, "loss": 0.8545, "step": 3403, "task_loss": 0.9101022481918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7391672730445862, "epoch": 2.88, "learning_rate": 3.5612848689771764e-05, "loss": 0.6606, "step": 3404, "task_loss": 1.180227518081665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5787700414657593, "epoch": 2.88, "learning_rate": 3.560862214708368e-05, "loss": 0.6409, "step": 3405, "task_loss": 0.5392124652862549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.558342456817627, "epoch": 2.88, "learning_rate": 3.560439560439561e-05, "loss": 0.6974, "step": 3406, "task_loss": 0.6289198398590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5504965782165527, "epoch": 2.88, "learning_rate": 3.560016906170752e-05, "loss": 0.7247, "step": 3407, "task_loss": 0.9255884885787964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7997416257858276, "epoch": 2.88, "learning_rate": 3.559594251901944e-05, "loss": 0.7936, "step": 3408, "task_loss": 0.4200798571109772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5677063465118408, "epoch": 2.88, "learning_rate": 3.559171597633136e-05, "loss": 0.6632, "step": 3409, "task_loss": 0.49648523330688477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6962506771087646, "epoch": 2.88, "learning_rate": 3.558748943364328e-05, "loss": 0.6746, "step": 3410, "task_loss": 0.602637529373169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46902984380722046, "epoch": 2.88, "learning_rate": 3.55832628909552e-05, "loss": 0.6844, "step": 3411, "task_loss": 1.2651164531707764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.937312126159668, "epoch": 2.88, "learning_rate": 3.557903634826712e-05, "loss": 0.6725, "step": 3412, "task_loss": 0.8039719462394714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6857679486274719, "epoch": 2.88, "learning_rate": 3.557480980557904e-05, "loss": 0.6839, "step": 3413, "task_loss": 0.811154842376709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.514104962348938, "epoch": 2.89, "learning_rate": 3.5570583262890954e-05, "loss": 0.7477, "step": 3414, "task_loss": 1.1416230201721191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5920051336288452, "epoch": 2.89, "learning_rate": 3.5566356720202874e-05, "loss": 0.8937, "step": 3415, "task_loss": 1.3573713302612305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5068243145942688, "epoch": 2.89, "learning_rate": 3.5562130177514794e-05, "loss": 0.715, "step": 3416, "task_loss": 0.8114786148071289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6290361881256104, "epoch": 2.89, "learning_rate": 3.5557903634826713e-05, "loss": 0.8212, "step": 3417, "task_loss": 1.0252795219421387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9865380525588989, "epoch": 2.89, "learning_rate": 3.555367709213863e-05, "loss": 0.8204, "step": 3418, "task_loss": 1.1151162385940552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6453908085823059, "epoch": 2.89, "learning_rate": 3.554945054945055e-05, "loss": 0.6628, "step": 3419, "task_loss": 0.5817614197731018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1869101524353027, "epoch": 2.89, "learning_rate": 3.5545224006762466e-05, "loss": 0.8956, "step": 3420, "task_loss": 1.7280217409133911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7250593900680542, "epoch": 2.89, "learning_rate": 3.5540997464074386e-05, "loss": 0.7479, "step": 3421, "task_loss": 1.6045708656311035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9519643783569336, "epoch": 2.89, "learning_rate": 3.5536770921386305e-05, "loss": 0.8224, "step": 3422, "task_loss": 1.6881781816482544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4207119941711426, "epoch": 2.89, "learning_rate": 3.553254437869823e-05, "loss": 0.6449, "step": 3423, "task_loss": 0.5683922171592712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5113493800163269, "epoch": 2.89, "learning_rate": 3.5528317836010145e-05, "loss": 0.7558, "step": 3424, "task_loss": 0.14405347406864166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9423756003379822, "epoch": 2.89, "learning_rate": 3.5524091293322065e-05, "loss": 0.6044, "step": 3425, "task_loss": 1.99700927734375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.783976674079895, "epoch": 2.9, "learning_rate": 3.5519864750633984e-05, "loss": 0.8013, "step": 3426, "task_loss": 1.6793901920318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.562309205532074, "epoch": 2.9, "learning_rate": 3.55156382079459e-05, "loss": 0.7635, "step": 3427, "task_loss": 0.7161197066307068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7572007179260254, "epoch": 2.9, "learning_rate": 3.5511411665257824e-05, "loss": 0.7213, "step": 3428, "task_loss": 0.5464855432510376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.651614785194397, "epoch": 2.9, "learning_rate": 3.5507185122569743e-05, "loss": 0.6467, "step": 3429, "task_loss": 0.7751861214637756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8770782947540283, "epoch": 2.9, "learning_rate": 3.5502958579881656e-05, "loss": 0.8095, "step": 3430, "task_loss": 0.8769362568855286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40966886281967163, "epoch": 2.9, "learning_rate": 3.5498732037193576e-05, "loss": 0.4654, "step": 3431, "task_loss": 0.4285908639431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9747872352600098, "epoch": 2.9, "learning_rate": 3.5494505494505496e-05, "loss": 0.7261, "step": 3432, "task_loss": 1.238671898841858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.706784725189209, "epoch": 2.9, "learning_rate": 3.5490278951817416e-05, "loss": 0.8105, "step": 3433, "task_loss": 0.6514732837677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.041033387184143, "epoch": 2.9, "learning_rate": 3.5486052409129335e-05, "loss": 0.6823, "step": 3434, "task_loss": 1.0296673774719238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6060007214546204, "epoch": 2.9, "learning_rate": 3.5481825866441255e-05, "loss": 0.8608, "step": 3435, "task_loss": 1.479670524597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0092724561691284, "epoch": 2.9, "learning_rate": 3.547759932375317e-05, "loss": 0.7246, "step": 3436, "task_loss": 1.5964360237121582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5766658782958984, "epoch": 2.9, "learning_rate": 3.547337278106509e-05, "loss": 0.5895, "step": 3437, "task_loss": 0.821502685546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7535123229026794, "epoch": 2.91, "learning_rate": 3.546914623837701e-05, "loss": 0.6715, "step": 3438, "task_loss": 0.3763297498226166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1241356134414673, "epoch": 2.91, "learning_rate": 3.546491969568893e-05, "loss": 0.7766, "step": 3439, "task_loss": 1.713033676147461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6541032791137695, "epoch": 2.91, "learning_rate": 3.546069315300085e-05, "loss": 0.6446, "step": 3440, "task_loss": 0.9213873147964478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6459155678749084, "epoch": 2.91, "learning_rate": 3.545646661031277e-05, "loss": 0.8694, "step": 3441, "task_loss": 1.4849766492843628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6222901344299316, "epoch": 2.91, "learning_rate": 3.5452240067624687e-05, "loss": 0.8048, "step": 3442, "task_loss": 0.6319260597229004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1128098964691162, "epoch": 2.91, "learning_rate": 3.54480135249366e-05, "loss": 1.1003, "step": 3443, "task_loss": 0.625972330570221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.435336709022522, "epoch": 2.91, "learning_rate": 3.544378698224852e-05, "loss": 0.5286, "step": 3444, "task_loss": 0.2884848713874817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4941648542881012, "epoch": 2.91, "learning_rate": 3.5439560439560446e-05, "loss": 0.8175, "step": 3445, "task_loss": 1.3311123847961426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7279483079910278, "epoch": 2.91, "learning_rate": 3.543533389687236e-05, "loss": 0.724, "step": 3446, "task_loss": 0.3616481423377991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9487766027450562, "epoch": 2.91, "learning_rate": 3.543110735418428e-05, "loss": 0.7124, "step": 3447, "task_loss": 1.3311735391616821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9165492057800293, "epoch": 2.91, "learning_rate": 3.54268808114962e-05, "loss": 0.8024, "step": 3448, "task_loss": 1.0065410137176514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0618176460266113, "epoch": 2.91, "learning_rate": 3.542265426880811e-05, "loss": 0.8527, "step": 3449, "task_loss": 1.2488597631454468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.09998619556427, "epoch": 2.92, "learning_rate": 3.541842772612004e-05, "loss": 0.8213, "step": 3450, "task_loss": 0.708964467048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7216796278953552, "epoch": 2.92, "learning_rate": 3.541420118343196e-05, "loss": 0.851, "step": 3451, "task_loss": 1.4598872661590576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7261360883712769, "epoch": 2.92, "learning_rate": 3.540997464074388e-05, "loss": 0.7679, "step": 3452, "task_loss": 0.5757009387016296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3547706604003906, "epoch": 2.92, "learning_rate": 3.540574809805579e-05, "loss": 0.6047, "step": 3453, "task_loss": 0.6476296782493591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8629271388053894, "epoch": 2.92, "learning_rate": 3.540152155536771e-05, "loss": 0.8117, "step": 3454, "task_loss": 0.8801674246788025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43172964453697205, "epoch": 2.92, "learning_rate": 3.539729501267963e-05, "loss": 0.5993, "step": 3455, "task_loss": 2.2454917430877686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5369665026664734, "epoch": 2.92, "learning_rate": 3.539306846999155e-05, "loss": 0.6634, "step": 3456, "task_loss": 1.1113795042037964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7071578502655029, "epoch": 2.92, "learning_rate": 3.538884192730347e-05, "loss": 0.7327, "step": 3457, "task_loss": 0.6536293029785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.934321403503418, "epoch": 2.92, "learning_rate": 3.538461538461539e-05, "loss": 0.7208, "step": 3458, "task_loss": 0.9927128553390503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8764635920524597, "epoch": 2.92, "learning_rate": 3.53803888419273e-05, "loss": 0.6167, "step": 3459, "task_loss": 0.6740485429763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9551503658294678, "epoch": 2.92, "learning_rate": 3.537616229923922e-05, "loss": 0.779, "step": 3460, "task_loss": 0.9154787063598633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6708633899688721, "epoch": 2.93, "learning_rate": 3.537193575655114e-05, "loss": 0.5018, "step": 3461, "task_loss": 0.23266105353832245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42945969104766846, "epoch": 2.93, "learning_rate": 3.536770921386306e-05, "loss": 0.5971, "step": 3462, "task_loss": 1.6702088117599487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5405272841453552, "epoch": 2.93, "learning_rate": 3.536348267117498e-05, "loss": 0.7712, "step": 3463, "task_loss": 1.674409031867981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0547758340835571, "epoch": 2.93, "learning_rate": 3.53592561284869e-05, "loss": 0.7497, "step": 3464, "task_loss": 0.7672004699707031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8756707906723022, "epoch": 2.93, "learning_rate": 3.5355029585798813e-05, "loss": 0.6427, "step": 3465, "task_loss": 0.7010441422462463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6829769611358643, "epoch": 2.93, "learning_rate": 3.535080304311073e-05, "loss": 0.7786, "step": 3466, "task_loss": 0.35273051261901855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3636772036552429, "epoch": 2.93, "learning_rate": 3.534657650042266e-05, "loss": 0.6732, "step": 3467, "task_loss": 0.5016867518424988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.559635579586029, "epoch": 2.93, "learning_rate": 3.534234995773458e-05, "loss": 0.5576, "step": 3468, "task_loss": 0.3644520938396454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7553682923316956, "epoch": 2.93, "learning_rate": 3.533812341504649e-05, "loss": 0.7665, "step": 3469, "task_loss": 1.2647511959075928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.55854332447052, "epoch": 2.93, "learning_rate": 3.533389687235841e-05, "loss": 0.8079, "step": 3470, "task_loss": 1.339534044265747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6029951572418213, "epoch": 2.93, "learning_rate": 3.532967032967033e-05, "loss": 0.7349, "step": 3471, "task_loss": 0.6847051978111267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4803905785083771, "epoch": 2.93, "learning_rate": 3.532544378698225e-05, "loss": 0.7698, "step": 3472, "task_loss": 0.39300790429115295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6557621359825134, "epoch": 2.94, "learning_rate": 3.532121724429417e-05, "loss": 0.8, "step": 3473, "task_loss": 0.689056932926178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38581717014312744, "epoch": 2.94, "learning_rate": 3.531699070160609e-05, "loss": 0.6589, "step": 3474, "task_loss": 1.1494646072387695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7917265892028809, "epoch": 2.94, "learning_rate": 3.5312764158918004e-05, "loss": 0.8605, "step": 3475, "task_loss": 0.4792248010635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4674330949783325, "epoch": 2.94, "learning_rate": 3.5308537616229924e-05, "loss": 0.6509, "step": 3476, "task_loss": 0.5007020235061646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7342607378959656, "epoch": 2.94, "learning_rate": 3.5304311073541844e-05, "loss": 0.6254, "step": 3477, "task_loss": 0.6376438736915588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.569838285446167, "epoch": 2.94, "learning_rate": 3.530008453085376e-05, "loss": 0.7327, "step": 3478, "task_loss": 0.7260635495185852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5506277084350586, "epoch": 2.94, "learning_rate": 3.529585798816568e-05, "loss": 0.8038, "step": 3479, "task_loss": 1.100770354270935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0023845434188843, "epoch": 2.94, "learning_rate": 3.52916314454776e-05, "loss": 0.7105, "step": 3480, "task_loss": 1.0005943775177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.136847734451294, "epoch": 2.94, "learning_rate": 3.528740490278952e-05, "loss": 0.9255, "step": 3481, "task_loss": 1.7830514907836914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.02586030960083, "epoch": 2.94, "learning_rate": 3.5283178360101435e-05, "loss": 0.8018, "step": 3482, "task_loss": 0.7019370794296265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47710490226745605, "epoch": 2.94, "learning_rate": 3.5278951817413355e-05, "loss": 0.5686, "step": 3483, "task_loss": 0.22217045724391937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8275421857833862, "epoch": 2.94, "learning_rate": 3.527472527472528e-05, "loss": 0.7999, "step": 3484, "task_loss": 1.4307249784469604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2308499813079834, "epoch": 2.95, "learning_rate": 3.5270498732037195e-05, "loss": 0.84, "step": 3485, "task_loss": 1.6488863229751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5758970975875854, "epoch": 2.95, "learning_rate": 3.5266272189349114e-05, "loss": 0.7943, "step": 3486, "task_loss": 0.3984098434448242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9738062620162964, "epoch": 2.95, "learning_rate": 3.5262045646661034e-05, "loss": 0.8309, "step": 3487, "task_loss": 1.131583333015442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6573241353034973, "epoch": 2.95, "learning_rate": 3.525781910397295e-05, "loss": 0.6877, "step": 3488, "task_loss": 0.5549317002296448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0458537340164185, "epoch": 2.95, "learning_rate": 3.5253592561284874e-05, "loss": 0.9446, "step": 3489, "task_loss": 1.9252161979675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6736923456192017, "epoch": 2.95, "learning_rate": 3.524936601859679e-05, "loss": 0.4841, "step": 3490, "task_loss": 0.46683552861213684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.806762158870697, "epoch": 2.95, "learning_rate": 3.5245139475908706e-05, "loss": 0.734, "step": 3491, "task_loss": 0.7796412706375122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4483540654182434, "epoch": 2.95, "learning_rate": 3.5240912933220626e-05, "loss": 0.6219, "step": 3492, "task_loss": 0.4687820076942444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39906173944473267, "epoch": 2.95, "learning_rate": 3.5236686390532546e-05, "loss": 0.6527, "step": 3493, "task_loss": 0.1871040165424347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7199836373329163, "epoch": 2.95, "learning_rate": 3.5232459847844466e-05, "loss": 0.9041, "step": 3494, "task_loss": 0.9986658692359924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5101284384727478, "epoch": 2.95, "learning_rate": 3.5228233305156385e-05, "loss": 0.5592, "step": 3495, "task_loss": 0.9976024031639099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7561028003692627, "epoch": 2.95, "learning_rate": 3.5224006762468305e-05, "loss": 0.7077, "step": 3496, "task_loss": 1.2883236408233643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0115514993667603, "epoch": 2.96, "learning_rate": 3.5219780219780225e-05, "loss": 0.7671, "step": 3497, "task_loss": 1.0213358402252197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6214433908462524, "epoch": 2.96, "learning_rate": 3.521555367709214e-05, "loss": 0.7222, "step": 3498, "task_loss": 1.0607128143310547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8613982796669006, "epoch": 2.96, "learning_rate": 3.521132713440406e-05, "loss": 0.8158, "step": 3499, "task_loss": 0.6398619413375854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8451910614967346, "epoch": 2.96, "learning_rate": 3.520710059171598e-05, "loss": 0.5851, "step": 3500, "task_loss": 1.3154056072235107 }, { "epoch": 2.96, "eval_accuracy": 0.8891881188118812, "eval_loss": 0.45988893508911133, "eval_runtime": 228.6581, "eval_samples_per_second": 110.427, "eval_steps_per_second": 0.866, "step": 3500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7575498223304749, "epoch": 2.96, "learning_rate": 3.52028740490279e-05, "loss": 0.6651, "step": 3501, "task_loss": 1.1811325550079346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6149915456771851, "epoch": 2.96, "learning_rate": 3.519864750633982e-05, "loss": 0.7076, "step": 3502, "task_loss": 0.3247385025024414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7450119853019714, "epoch": 2.96, "learning_rate": 3.5194420963651736e-05, "loss": 0.5744, "step": 3503, "task_loss": 0.24779245257377625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2241921424865723, "epoch": 2.96, "learning_rate": 3.519019442096365e-05, "loss": 0.8492, "step": 3504, "task_loss": 2.1540780067443848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4457588791847229, "epoch": 2.96, "learning_rate": 3.518596787827557e-05, "loss": 0.5055, "step": 3505, "task_loss": 1.8695106506347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1786446571350098, "epoch": 2.96, "learning_rate": 3.5181741335587496e-05, "loss": 0.7534, "step": 3506, "task_loss": 1.5412825345993042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.611754298210144, "epoch": 2.96, "learning_rate": 3.517751479289941e-05, "loss": 0.8446, "step": 3507, "task_loss": 0.8767352104187012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.6720008850097656, "epoch": 2.96, "learning_rate": 3.517328825021133e-05, "loss": 0.9244, "step": 3508, "task_loss": 0.9086243510246277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.78460693359375, "epoch": 2.97, "learning_rate": 3.516906170752325e-05, "loss": 0.6703, "step": 3509, "task_loss": 1.3038012981414795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.115155577659607, "epoch": 2.97, "learning_rate": 3.516483516483517e-05, "loss": 0.8794, "step": 3510, "task_loss": 2.5163519382476807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5943164229393005, "epoch": 2.97, "learning_rate": 3.516060862214709e-05, "loss": 1.1491, "step": 3511, "task_loss": 1.702355146408081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6583766937255859, "epoch": 2.97, "learning_rate": 3.515638207945901e-05, "loss": 0.7126, "step": 3512, "task_loss": 0.9775614738464355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5614702701568604, "epoch": 2.97, "learning_rate": 3.515215553677093e-05, "loss": 0.6494, "step": 3513, "task_loss": 1.4156602621078491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5537071228027344, "epoch": 2.97, "learning_rate": 3.514792899408284e-05, "loss": 0.643, "step": 3514, "task_loss": 0.24175675213336945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48728644847869873, "epoch": 2.97, "learning_rate": 3.514370245139476e-05, "loss": 0.8004, "step": 3515, "task_loss": 0.9385992288589478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7581866383552551, "epoch": 2.97, "learning_rate": 3.513947590870668e-05, "loss": 0.6006, "step": 3516, "task_loss": 0.6312645673751831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6775551438331604, "epoch": 2.97, "learning_rate": 3.51352493660186e-05, "loss": 0.611, "step": 3517, "task_loss": 0.7012837529182434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6819434762001038, "epoch": 2.97, "learning_rate": 3.513102282333052e-05, "loss": 0.6328, "step": 3518, "task_loss": 0.567051112651825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8982431888580322, "epoch": 2.97, "learning_rate": 3.512679628064244e-05, "loss": 0.8216, "step": 3519, "task_loss": 1.0541644096374512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44036123156547546, "epoch": 2.97, "learning_rate": 3.512256973795435e-05, "loss": 0.531, "step": 3520, "task_loss": 0.36266809701919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0773639678955078, "epoch": 2.98, "learning_rate": 3.511834319526627e-05, "loss": 0.6669, "step": 3521, "task_loss": 1.2040399312973022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6456725001335144, "epoch": 2.98, "learning_rate": 3.511411665257819e-05, "loss": 0.79, "step": 3522, "task_loss": 0.3172779083251953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5409705638885498, "epoch": 2.98, "learning_rate": 3.510989010989011e-05, "loss": 0.7878, "step": 3523, "task_loss": 0.7052796483039856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9127213954925537, "epoch": 2.98, "learning_rate": 3.510566356720203e-05, "loss": 0.7125, "step": 3524, "task_loss": 0.7552744150161743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1011497974395752, "epoch": 2.98, "learning_rate": 3.510143702451395e-05, "loss": 0.7777, "step": 3525, "task_loss": 0.5825923085212708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6759148240089417, "epoch": 2.98, "learning_rate": 3.509721048182587e-05, "loss": 0.8311, "step": 3526, "task_loss": 0.709834098815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7002780437469482, "epoch": 2.98, "learning_rate": 3.509298393913778e-05, "loss": 0.7319, "step": 3527, "task_loss": 1.1596548557281494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46024376153945923, "epoch": 2.98, "learning_rate": 3.508875739644971e-05, "loss": 0.7047, "step": 3528, "task_loss": 0.6764599680900574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.8885741233825684, "epoch": 2.98, "learning_rate": 3.508453085376163e-05, "loss": 0.8634, "step": 3529, "task_loss": 1.3570809364318848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7255014181137085, "epoch": 2.98, "learning_rate": 3.508030431107354e-05, "loss": 0.7611, "step": 3530, "task_loss": 2.0896525382995605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5057455897331238, "epoch": 2.98, "learning_rate": 3.507607776838546e-05, "loss": 0.8551, "step": 3531, "task_loss": 0.3309262692928314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45254671573638916, "epoch": 2.99, "learning_rate": 3.507185122569738e-05, "loss": 0.7097, "step": 3532, "task_loss": 0.39333784580230713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8713363409042358, "epoch": 2.99, "learning_rate": 3.5067624683009295e-05, "loss": 0.8156, "step": 3533, "task_loss": 1.6795499324798584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.60808265209198, "epoch": 2.99, "learning_rate": 3.506339814032122e-05, "loss": 0.5878, "step": 3534, "task_loss": 0.8184134364128113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8242766857147217, "epoch": 2.99, "learning_rate": 3.505917159763314e-05, "loss": 0.7817, "step": 3535, "task_loss": 0.9278186559677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5657252073287964, "epoch": 2.99, "learning_rate": 3.5054945054945054e-05, "loss": 0.7697, "step": 3536, "task_loss": 0.5133640766143799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6599215865135193, "epoch": 2.99, "learning_rate": 3.5050718512256974e-05, "loss": 0.5593, "step": 3537, "task_loss": 1.3066964149475098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37537139654159546, "epoch": 2.99, "learning_rate": 3.504649196956889e-05, "loss": 0.6747, "step": 3538, "task_loss": 0.8508651852607727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6105172038078308, "epoch": 2.99, "learning_rate": 3.504226542688081e-05, "loss": 0.676, "step": 3539, "task_loss": 1.255632996559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47263914346694946, "epoch": 2.99, "learning_rate": 3.503803888419273e-05, "loss": 0.7332, "step": 3540, "task_loss": 1.306686520576477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6724578142166138, "epoch": 2.99, "learning_rate": 3.503381234150465e-05, "loss": 0.5942, "step": 3541, "task_loss": 0.9191336035728455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6361077427864075, "epoch": 2.99, "learning_rate": 3.502958579881657e-05, "loss": 0.6647, "step": 3542, "task_loss": 0.6519774198532104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6790368556976318, "epoch": 2.99, "learning_rate": 3.5025359256128485e-05, "loss": 0.6108, "step": 3543, "task_loss": 1.3175681829452515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39566344022750854, "epoch": 3.0, "learning_rate": 3.5021132713440405e-05, "loss": 0.8359, "step": 3544, "task_loss": 0.7916802763938904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4797672629356384, "epoch": 3.0, "learning_rate": 3.501690617075233e-05, "loss": 0.6771, "step": 3545, "task_loss": 0.8603438138961792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1398463249206543, "epoch": 3.0, "learning_rate": 3.5012679628064244e-05, "loss": 0.8374, "step": 3546, "task_loss": 0.814414381980896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6054514050483704, "epoch": 3.0, "learning_rate": 3.5008453085376164e-05, "loss": 0.6187, "step": 3547, "task_loss": 0.8762628436088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5500036478042603, "epoch": 3.0, "learning_rate": 3.5004226542688084e-05, "loss": 0.4909, "step": 3548, "task_loss": 0.31093916296958923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8774495124816895, "epoch": 3.0, "learning_rate": 3.5e-05, "loss": 0.644, "step": 3549, "task_loss": 0.39253199100494385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43953657150268555, "epoch": 3.0, "learning_rate": 3.499577345731192e-05, "loss": 1.3566, "step": 3550, "task_loss": 0.8424968123435974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4337252974510193, "epoch": 3.0, "learning_rate": 3.499154691462384e-05, "loss": 0.4779, "step": 3551, "task_loss": 0.44218409061431885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8042478561401367, "epoch": 3.0, "learning_rate": 3.4987320371935756e-05, "loss": 0.6824, "step": 3552, "task_loss": 0.3617814779281616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6113454103469849, "epoch": 3.0, "learning_rate": 3.4983093829247676e-05, "loss": 0.511, "step": 3553, "task_loss": 1.1056156158447266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8372128009796143, "epoch": 3.0, "learning_rate": 3.4978867286559596e-05, "loss": 0.6523, "step": 3554, "task_loss": 0.5932086110115051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29487931728363037, "epoch": 3.01, "learning_rate": 3.4974640743871515e-05, "loss": 0.6849, "step": 3555, "task_loss": 0.6693393588066101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.965597927570343, "epoch": 3.01, "learning_rate": 3.4970414201183435e-05, "loss": 0.6847, "step": 3556, "task_loss": 0.8072828650474548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3738608956336975, "epoch": 3.01, "learning_rate": 3.4966187658495355e-05, "loss": 0.5951, "step": 3557, "task_loss": 0.30251047015190125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6709148287773132, "epoch": 3.01, "learning_rate": 3.4961961115807275e-05, "loss": 0.701, "step": 3558, "task_loss": 0.7460740208625793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5601727962493896, "epoch": 3.01, "learning_rate": 3.495773457311919e-05, "loss": 0.741, "step": 3559, "task_loss": 0.7785150408744812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8178591132164001, "epoch": 3.01, "learning_rate": 3.495350803043111e-05, "loss": 0.6913, "step": 3560, "task_loss": 1.414636492729187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8537206053733826, "epoch": 3.01, "learning_rate": 3.494928148774303e-05, "loss": 0.7244, "step": 3561, "task_loss": 1.7884440422058105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6941735744476318, "epoch": 3.01, "learning_rate": 3.494505494505495e-05, "loss": 0.7028, "step": 3562, "task_loss": 0.658213198184967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5836626291275024, "epoch": 3.01, "learning_rate": 3.4940828402366866e-05, "loss": 0.8885, "step": 3563, "task_loss": 0.5522605180740356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.891832709312439, "epoch": 3.01, "learning_rate": 3.4936601859678786e-05, "loss": 0.7207, "step": 3564, "task_loss": 0.9203711748123169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3659219741821289, "epoch": 3.01, "learning_rate": 3.49323753169907e-05, "loss": 0.6811, "step": 3565, "task_loss": 0.881561279296875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0173832178115845, "epoch": 3.01, "learning_rate": 3.492814877430262e-05, "loss": 0.5866, "step": 3566, "task_loss": 0.4808333218097687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1727278232574463, "epoch": 3.02, "learning_rate": 3.492392223161454e-05, "loss": 0.9874, "step": 3567, "task_loss": 1.234318733215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6782771348953247, "epoch": 3.02, "learning_rate": 3.4919695688926465e-05, "loss": 0.6266, "step": 3568, "task_loss": 1.586388349533081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7511957883834839, "epoch": 3.02, "learning_rate": 3.491546914623838e-05, "loss": 0.7233, "step": 3569, "task_loss": 0.6499571800231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1829522848129272, "epoch": 3.02, "learning_rate": 3.49112426035503e-05, "loss": 0.8081, "step": 3570, "task_loss": 1.551069736480713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4098557233810425, "epoch": 3.02, "learning_rate": 3.490701606086222e-05, "loss": 0.7652, "step": 3571, "task_loss": 0.5927712321281433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9034950733184814, "epoch": 3.02, "learning_rate": 3.490278951817413e-05, "loss": 1.049, "step": 3572, "task_loss": 0.8563557267189026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5900263786315918, "epoch": 3.02, "learning_rate": 3.489856297548606e-05, "loss": 0.6691, "step": 3573, "task_loss": 1.4217010736465454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41364336013793945, "epoch": 3.02, "learning_rate": 3.489433643279798e-05, "loss": 0.5466, "step": 3574, "task_loss": 0.5450477004051208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6399833559989929, "epoch": 3.02, "learning_rate": 3.489010989010989e-05, "loss": 0.7892, "step": 3575, "task_loss": 0.9992542862892151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7601376175880432, "epoch": 3.02, "learning_rate": 3.488588334742181e-05, "loss": 0.5612, "step": 3576, "task_loss": 0.8570849895477295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5140930414199829, "epoch": 3.02, "learning_rate": 3.488165680473373e-05, "loss": 0.6433, "step": 3577, "task_loss": 0.5177265405654907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42960435152053833, "epoch": 3.02, "learning_rate": 3.487743026204565e-05, "loss": 0.7144, "step": 3578, "task_loss": 0.3304111361503601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5891939401626587, "epoch": 3.03, "learning_rate": 3.487320371935757e-05, "loss": 0.5223, "step": 3579, "task_loss": 0.6053823232650757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4822579622268677, "epoch": 3.03, "learning_rate": 3.486897717666949e-05, "loss": 0.5621, "step": 3580, "task_loss": 0.4804011285305023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6441078782081604, "epoch": 3.03, "learning_rate": 3.48647506339814e-05, "loss": 0.743, "step": 3581, "task_loss": 1.1420804262161255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5860291123390198, "epoch": 3.03, "learning_rate": 3.486052409129332e-05, "loss": 0.6635, "step": 3582, "task_loss": 0.6237084269523621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.582332968711853, "epoch": 3.03, "learning_rate": 3.485629754860524e-05, "loss": 0.7599, "step": 3583, "task_loss": 1.3454017639160156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8366926908493042, "epoch": 3.03, "learning_rate": 3.485207100591716e-05, "loss": 0.5525, "step": 3584, "task_loss": 0.9461635947227478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44944024085998535, "epoch": 3.03, "learning_rate": 3.484784446322908e-05, "loss": 0.7438, "step": 3585, "task_loss": 0.6866551637649536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7555097341537476, "epoch": 3.03, "learning_rate": 3.4843617920541e-05, "loss": 0.5589, "step": 3586, "task_loss": 0.9157886505126953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4893421530723572, "epoch": 3.03, "learning_rate": 3.483939137785292e-05, "loss": 0.7171, "step": 3587, "task_loss": 1.3617891073226929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4786592423915863, "epoch": 3.03, "learning_rate": 3.483516483516483e-05, "loss": 0.6531, "step": 3588, "task_loss": 0.7157772779464722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44599664211273193, "epoch": 3.03, "learning_rate": 3.483093829247675e-05, "loss": 0.4893, "step": 3589, "task_loss": 0.4589158296585083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0318453311920166, "epoch": 3.03, "learning_rate": 3.482671174978868e-05, "loss": 0.7698, "step": 3590, "task_loss": 0.9067234396934509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0646498203277588, "epoch": 3.04, "learning_rate": 3.482248520710059e-05, "loss": 0.8219, "step": 3591, "task_loss": 1.3337565660476685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8827232122421265, "epoch": 3.04, "learning_rate": 3.481825866441251e-05, "loss": 0.6953, "step": 3592, "task_loss": 1.154910683631897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3647899627685547, "epoch": 3.04, "learning_rate": 3.481403212172443e-05, "loss": 0.513, "step": 3593, "task_loss": 0.4109800159931183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5805737972259521, "epoch": 3.04, "learning_rate": 3.4809805579036345e-05, "loss": 0.8607, "step": 3594, "task_loss": 1.0610694885253906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5662297606468201, "epoch": 3.04, "learning_rate": 3.480557903634827e-05, "loss": 0.5418, "step": 3595, "task_loss": 0.47057056427001953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9658246040344238, "epoch": 3.04, "learning_rate": 3.480135249366019e-05, "loss": 0.669, "step": 3596, "task_loss": 0.48805683851242065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.108781099319458, "epoch": 3.04, "learning_rate": 3.479712595097211e-05, "loss": 0.7186, "step": 3597, "task_loss": 1.9384948015213013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5495907068252563, "epoch": 3.04, "learning_rate": 3.4792899408284023e-05, "loss": 0.6172, "step": 3598, "task_loss": 1.0088746547698975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4112240672111511, "epoch": 3.04, "learning_rate": 3.478867286559594e-05, "loss": 0.5191, "step": 3599, "task_loss": 0.6627725958824158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.058617115020752, "epoch": 3.04, "learning_rate": 3.478444632290786e-05, "loss": 0.7691, "step": 3600, "task_loss": 0.6134690642356873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8195458650588989, "epoch": 3.04, "learning_rate": 3.478021978021978e-05, "loss": 0.6174, "step": 3601, "task_loss": 2.2314844131469727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9551973342895508, "epoch": 3.04, "learning_rate": 3.47759932375317e-05, "loss": 0.9318, "step": 3602, "task_loss": 2.072413921356201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6425678730010986, "epoch": 3.05, "learning_rate": 3.477176669484362e-05, "loss": 0.5524, "step": 3603, "task_loss": 0.7999861836433411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.940825879573822, "epoch": 3.05, "learning_rate": 3.4767540152155535e-05, "loss": 0.6936, "step": 3604, "task_loss": 0.6632443070411682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7223283648490906, "epoch": 3.05, "learning_rate": 3.4763313609467455e-05, "loss": 0.552, "step": 3605, "task_loss": 0.20343735814094543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46318188309669495, "epoch": 3.05, "learning_rate": 3.4759087066779375e-05, "loss": 0.5071, "step": 3606, "task_loss": 0.7007144093513489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9650071263313293, "epoch": 3.05, "learning_rate": 3.4754860524091294e-05, "loss": 0.5177, "step": 3607, "task_loss": 0.4019029140472412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3849183320999146, "epoch": 3.05, "learning_rate": 3.4750633981403214e-05, "loss": 0.9365, "step": 3608, "task_loss": 0.7168657779693604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6050105690956116, "epoch": 3.05, "learning_rate": 3.4746407438715134e-05, "loss": 0.642, "step": 3609, "task_loss": 0.5087983012199402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43330299854278564, "epoch": 3.05, "learning_rate": 3.474218089602705e-05, "loss": 0.5827, "step": 3610, "task_loss": 0.5415623784065247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5316822528839111, "epoch": 3.05, "learning_rate": 3.4737954353338967e-05, "loss": 0.5948, "step": 3611, "task_loss": 0.6772130131721497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4631333649158478, "epoch": 3.05, "learning_rate": 3.473372781065089e-05, "loss": 0.5518, "step": 3612, "task_loss": 0.5316349267959595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48041319847106934, "epoch": 3.05, "learning_rate": 3.472950126796281e-05, "loss": 0.716, "step": 3613, "task_loss": 0.3687160313129425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9606984853744507, "epoch": 3.05, "learning_rate": 3.4725274725274726e-05, "loss": 0.7077, "step": 3614, "task_loss": 0.9231090545654297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38540130853652954, "epoch": 3.06, "learning_rate": 3.4721048182586645e-05, "loss": 0.7155, "step": 3615, "task_loss": 0.4642612040042877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5279570817947388, "epoch": 3.06, "learning_rate": 3.4716821639898565e-05, "loss": 0.7943, "step": 3616, "task_loss": 0.2436733841896057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5405007004737854, "epoch": 3.06, "learning_rate": 3.4712595097210485e-05, "loss": 0.695, "step": 3617, "task_loss": 0.8684276342391968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7234306931495667, "epoch": 3.06, "learning_rate": 3.4708368554522405e-05, "loss": 0.6789, "step": 3618, "task_loss": 0.9345763325691223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5793839693069458, "epoch": 3.06, "learning_rate": 3.4704142011834324e-05, "loss": 0.655, "step": 3619, "task_loss": 0.1378188282251358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48766395449638367, "epoch": 3.06, "learning_rate": 3.469991546914624e-05, "loss": 0.7424, "step": 3620, "task_loss": 0.16840820014476776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9185991287231445, "epoch": 3.06, "learning_rate": 3.469568892645816e-05, "loss": 0.885, "step": 3621, "task_loss": 0.7222850322723389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6478458642959595, "epoch": 3.06, "learning_rate": 3.469146238377008e-05, "loss": 0.6751, "step": 3622, "task_loss": 0.9369020462036133 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43672165274620056, "epoch": 3.06, "learning_rate": 3.4687235841081997e-05, "loss": 0.5366, "step": 3623, "task_loss": 0.6579287648200989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42594438791275024, "epoch": 3.06, "learning_rate": 3.4683009298393916e-05, "loss": 0.7697, "step": 3624, "task_loss": 0.7111186981201172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6135975122451782, "epoch": 3.06, "learning_rate": 3.4678782755705836e-05, "loss": 0.5485, "step": 3625, "task_loss": 0.9757449626922607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9196648001670837, "epoch": 3.07, "learning_rate": 3.467455621301775e-05, "loss": 0.6215, "step": 3626, "task_loss": 1.1645159721374512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5087860822677612, "epoch": 3.07, "learning_rate": 3.467032967032967e-05, "loss": 0.5705, "step": 3627, "task_loss": 0.4755611717700958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1587384939193726, "epoch": 3.07, "learning_rate": 3.466610312764159e-05, "loss": 0.8849, "step": 3628, "task_loss": 0.7653050422668457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4880496859550476, "epoch": 3.07, "learning_rate": 3.4661876584953515e-05, "loss": 0.5321, "step": 3629, "task_loss": 0.6090231537818909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1166832447052002, "epoch": 3.07, "learning_rate": 3.465765004226543e-05, "loss": 0.6751, "step": 3630, "task_loss": 0.778313159942627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8939477801322937, "epoch": 3.07, "learning_rate": 3.465342349957735e-05, "loss": 0.6275, "step": 3631, "task_loss": 1.2108412981033325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8347808718681335, "epoch": 3.07, "learning_rate": 3.464919695688927e-05, "loss": 0.7194, "step": 3632, "task_loss": 0.6223213076591492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7574350833892822, "epoch": 3.07, "learning_rate": 3.464497041420118e-05, "loss": 0.8716, "step": 3633, "task_loss": 1.2905359268188477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6879938840866089, "epoch": 3.07, "learning_rate": 3.464074387151311e-05, "loss": 0.6335, "step": 3634, "task_loss": 0.3570175766944885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3733072578907013, "epoch": 3.07, "learning_rate": 3.463651732882503e-05, "loss": 0.721, "step": 3635, "task_loss": 0.4897845983505249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6140908598899841, "epoch": 3.07, "learning_rate": 3.463229078613694e-05, "loss": 0.61, "step": 3636, "task_loss": 0.3571910262107849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5097900629043579, "epoch": 3.07, "learning_rate": 3.462806424344886e-05, "loss": 0.7031, "step": 3637, "task_loss": 1.0409501791000366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5055532455444336, "epoch": 3.08, "learning_rate": 3.462383770076078e-05, "loss": 0.6336, "step": 3638, "task_loss": 0.7054551243782043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6209069490432739, "epoch": 3.08, "learning_rate": 3.46196111580727e-05, "loss": 0.5907, "step": 3639, "task_loss": 0.24482828378677368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7833565473556519, "epoch": 3.08, "learning_rate": 3.461538461538462e-05, "loss": 0.8233, "step": 3640, "task_loss": 1.542444109916687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5171521902084351, "epoch": 3.08, "learning_rate": 3.461115807269654e-05, "loss": 0.6699, "step": 3641, "task_loss": 0.10688822716474533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7827117443084717, "epoch": 3.08, "learning_rate": 3.460693153000846e-05, "loss": 0.6743, "step": 3642, "task_loss": 1.0264723300933838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3677600026130676, "epoch": 3.08, "learning_rate": 3.460270498732037e-05, "loss": 0.6553, "step": 3643, "task_loss": 0.49530482292175293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4029976725578308, "epoch": 3.08, "learning_rate": 3.459847844463229e-05, "loss": 0.5039, "step": 3644, "task_loss": 0.7688202261924744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5486735105514526, "epoch": 3.08, "learning_rate": 3.459425190194421e-05, "loss": 0.4963, "step": 3645, "task_loss": 0.11027728021144867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3741183280944824, "epoch": 3.08, "learning_rate": 3.459002535925613e-05, "loss": 0.5792, "step": 3646, "task_loss": 0.5936026573181152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6782668828964233, "epoch": 3.08, "learning_rate": 3.458579881656805e-05, "loss": 0.6633, "step": 3647, "task_loss": 0.6288702487945557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4231131076812744, "epoch": 3.08, "learning_rate": 3.458157227387997e-05, "loss": 0.8048, "step": 3648, "task_loss": 0.930057168006897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6348741054534912, "epoch": 3.08, "learning_rate": 3.457734573119188e-05, "loss": 0.6539, "step": 3649, "task_loss": 0.40459612011909485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5477964878082275, "epoch": 3.09, "learning_rate": 3.45731191885038e-05, "loss": 0.643, "step": 3650, "task_loss": 1.3185477256774902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46042338013648987, "epoch": 3.09, "learning_rate": 3.456889264581573e-05, "loss": 0.7995, "step": 3651, "task_loss": 0.8438501358032227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8068141341209412, "epoch": 3.09, "learning_rate": 3.456466610312764e-05, "loss": 0.7769, "step": 3652, "task_loss": 0.9261831045150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.705544114112854, "epoch": 3.09, "learning_rate": 3.456043956043956e-05, "loss": 0.643, "step": 3653, "task_loss": 0.6667105555534363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.283825397491455, "epoch": 3.09, "learning_rate": 3.455621301775148e-05, "loss": 0.8295, "step": 3654, "task_loss": 1.1561167240142822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5110570192337036, "epoch": 3.09, "learning_rate": 3.4551986475063394e-05, "loss": 0.6884, "step": 3655, "task_loss": 1.1525349617004395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9352744817733765, "epoch": 3.09, "learning_rate": 3.454775993237532e-05, "loss": 0.6429, "step": 3656, "task_loss": 1.1322050094604492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8701868057250977, "epoch": 3.09, "learning_rate": 3.454353338968724e-05, "loss": 0.9323, "step": 3657, "task_loss": 0.6373240351676941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1272753477096558, "epoch": 3.09, "learning_rate": 3.453930684699916e-05, "loss": 0.9596, "step": 3658, "task_loss": 1.3707433938980103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7567120790481567, "epoch": 3.09, "learning_rate": 3.453508030431107e-05, "loss": 0.5409, "step": 3659, "task_loss": 0.6953011751174927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.862185001373291, "epoch": 3.09, "learning_rate": 3.453085376162299e-05, "loss": 0.5595, "step": 3660, "task_loss": 0.5445569157600403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7042524814605713, "epoch": 3.09, "learning_rate": 3.452662721893491e-05, "loss": 0.6456, "step": 3661, "task_loss": 1.1954067945480347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4417010545730591, "epoch": 3.1, "learning_rate": 3.452240067624683e-05, "loss": 0.6821, "step": 3662, "task_loss": 0.9332767724990845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31428661942481995, "epoch": 3.1, "learning_rate": 3.451817413355875e-05, "loss": 0.5747, "step": 3663, "task_loss": 1.1859930753707886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7423601746559143, "epoch": 3.1, "learning_rate": 3.451394759087067e-05, "loss": 0.8787, "step": 3664, "task_loss": 1.6809523105621338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8902570605278015, "epoch": 3.1, "learning_rate": 3.4509721048182585e-05, "loss": 0.7193, "step": 3665, "task_loss": 0.8307445645332336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4651380181312561, "epoch": 3.1, "learning_rate": 3.4505494505494505e-05, "loss": 0.5203, "step": 3666, "task_loss": 0.2219117283821106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6209709644317627, "epoch": 3.1, "learning_rate": 3.4501267962806424e-05, "loss": 0.6962, "step": 3667, "task_loss": 0.3096996545791626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4959348142147064, "epoch": 3.1, "learning_rate": 3.4497041420118344e-05, "loss": 0.6595, "step": 3668, "task_loss": 0.23782256245613098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6249418258666992, "epoch": 3.1, "learning_rate": 3.4492814877430264e-05, "loss": 0.5658, "step": 3669, "task_loss": 0.6621933579444885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2890488803386688, "epoch": 3.1, "learning_rate": 3.4488588334742184e-05, "loss": 0.5579, "step": 3670, "task_loss": 0.8235982656478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41222092509269714, "epoch": 3.1, "learning_rate": 3.4484361792054103e-05, "loss": 0.4933, "step": 3671, "task_loss": 0.6372846961021423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47301530838012695, "epoch": 3.1, "learning_rate": 3.4480135249366016e-05, "loss": 0.4614, "step": 3672, "task_loss": 0.31961125135421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2152414321899414, "epoch": 3.1, "learning_rate": 3.447590870667794e-05, "loss": 0.8295, "step": 3673, "task_loss": 0.8630499243736267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3054693937301636, "epoch": 3.11, "learning_rate": 3.447168216398986e-05, "loss": 0.5383, "step": 3674, "task_loss": 0.2641279697418213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8957634568214417, "epoch": 3.11, "learning_rate": 3.4467455621301776e-05, "loss": 0.7628, "step": 3675, "task_loss": 0.5650660991668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3680400848388672, "epoch": 3.11, "learning_rate": 3.4463229078613695e-05, "loss": 0.645, "step": 3676, "task_loss": 0.500686764717102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.710966944694519, "epoch": 3.11, "learning_rate": 3.4459002535925615e-05, "loss": 0.6517, "step": 3677, "task_loss": 0.3076692223548889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8600115180015564, "epoch": 3.11, "learning_rate": 3.445477599323753e-05, "loss": 0.6647, "step": 3678, "task_loss": 1.2308040857315063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7716382741928101, "epoch": 3.11, "learning_rate": 3.4450549450549455e-05, "loss": 0.8439, "step": 3679, "task_loss": 1.227615475654602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7783896327018738, "epoch": 3.11, "learning_rate": 3.4446322907861374e-05, "loss": 0.5951, "step": 3680, "task_loss": 1.0431450605392456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34108084440231323, "epoch": 3.11, "learning_rate": 3.444209636517329e-05, "loss": 0.4639, "step": 3681, "task_loss": 0.2700452506542206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4224686026573181, "epoch": 3.11, "learning_rate": 3.443786982248521e-05, "loss": 0.5596, "step": 3682, "task_loss": 0.5008390545845032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5877885222434998, "epoch": 3.11, "learning_rate": 3.443364327979713e-05, "loss": 0.857, "step": 3683, "task_loss": 1.0885577201843262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37955549359321594, "epoch": 3.11, "learning_rate": 3.4429416737109046e-05, "loss": 0.4394, "step": 3684, "task_loss": 0.3138817250728607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40056484937667847, "epoch": 3.11, "learning_rate": 3.4425190194420966e-05, "loss": 0.5732, "step": 3685, "task_loss": 0.2762420177459717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46877509355545044, "epoch": 3.12, "learning_rate": 3.4420963651732886e-05, "loss": 0.5995, "step": 3686, "task_loss": 0.6931828856468201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7388428449630737, "epoch": 3.12, "learning_rate": 3.4416737109044806e-05, "loss": 0.6687, "step": 3687, "task_loss": 0.497245728969574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5269056558609009, "epoch": 3.12, "learning_rate": 3.441251056635672e-05, "loss": 0.5859, "step": 3688, "task_loss": 0.15157519280910492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5469696521759033, "epoch": 3.12, "learning_rate": 3.440828402366864e-05, "loss": 0.6409, "step": 3689, "task_loss": 0.6367945075035095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5809814929962158, "epoch": 3.12, "learning_rate": 3.4404057480980565e-05, "loss": 0.7934, "step": 3690, "task_loss": 1.1469680070877075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7316672801971436, "epoch": 3.12, "learning_rate": 3.439983093829248e-05, "loss": 0.5431, "step": 3691, "task_loss": 0.5061452984809875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6621488928794861, "epoch": 3.12, "learning_rate": 3.43956043956044e-05, "loss": 0.4336, "step": 3692, "task_loss": 0.5856128931045532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1469172239303589, "epoch": 3.12, "learning_rate": 3.439137785291632e-05, "loss": 0.7151, "step": 3693, "task_loss": 1.250531792640686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2513699233531952, "epoch": 3.12, "learning_rate": 3.438715131022823e-05, "loss": 0.5464, "step": 3694, "task_loss": 0.03669927641749382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6313196420669556, "epoch": 3.12, "learning_rate": 3.438292476754015e-05, "loss": 0.6615, "step": 3695, "task_loss": 1.24752676486969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9174197912216187, "epoch": 3.12, "learning_rate": 3.4378698224852077e-05, "loss": 0.785, "step": 3696, "task_loss": 2.034080743789673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9295786023139954, "epoch": 3.13, "learning_rate": 3.437447168216399e-05, "loss": 0.8877, "step": 3697, "task_loss": 2.1541543006896973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6636124849319458, "epoch": 3.13, "learning_rate": 3.437024513947591e-05, "loss": 0.6729, "step": 3698, "task_loss": 0.8873884677886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46804749965667725, "epoch": 3.13, "learning_rate": 3.436601859678783e-05, "loss": 0.6133, "step": 3699, "task_loss": 0.32078617811203003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8805177807807922, "epoch": 3.13, "learning_rate": 3.436179205409975e-05, "loss": 0.7605, "step": 3700, "task_loss": 0.886267364025116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.103912115097046, "epoch": 3.13, "learning_rate": 3.435756551141167e-05, "loss": 0.8799, "step": 3701, "task_loss": 0.9798417687416077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0157362222671509, "epoch": 3.13, "learning_rate": 3.435333896872359e-05, "loss": 0.5984, "step": 3702, "task_loss": 0.5364814400672913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29741010069847107, "epoch": 3.13, "learning_rate": 3.434911242603551e-05, "loss": 0.7944, "step": 3703, "task_loss": 0.3277398645877838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7587305307388306, "epoch": 3.13, "learning_rate": 3.434488588334742e-05, "loss": 0.7329, "step": 3704, "task_loss": 0.7631685733795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7421884536743164, "epoch": 3.13, "learning_rate": 3.434065934065934e-05, "loss": 0.8099, "step": 3705, "task_loss": 1.821908950805664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0236493349075317, "epoch": 3.13, "learning_rate": 3.433643279797126e-05, "loss": 0.7777, "step": 3706, "task_loss": 0.8544322848320007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6677559018135071, "epoch": 3.13, "learning_rate": 3.433220625528318e-05, "loss": 0.5537, "step": 3707, "task_loss": 0.9479256272315979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8967137932777405, "epoch": 3.13, "learning_rate": 3.43279797125951e-05, "loss": 0.7185, "step": 3708, "task_loss": 0.4673652648925781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0994935035705566, "epoch": 3.14, "learning_rate": 3.432375316990702e-05, "loss": 0.7953, "step": 3709, "task_loss": 1.1799334287643433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7766702175140381, "epoch": 3.14, "learning_rate": 3.431952662721893e-05, "loss": 0.6018, "step": 3710, "task_loss": 1.365644097328186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7631234526634216, "epoch": 3.14, "learning_rate": 3.431530008453085e-05, "loss": 0.6444, "step": 3711, "task_loss": 2.0571107864379883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6685872673988342, "epoch": 3.14, "learning_rate": 3.431107354184277e-05, "loss": 0.5958, "step": 3712, "task_loss": 0.348898321390152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5137240290641785, "epoch": 3.14, "learning_rate": 3.430684699915469e-05, "loss": 0.7048, "step": 3713, "task_loss": 0.8146230578422546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5841662287712097, "epoch": 3.14, "learning_rate": 3.430262045646661e-05, "loss": 0.581, "step": 3714, "task_loss": 0.3389625549316406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.501213788986206, "epoch": 3.14, "learning_rate": 3.429839391377853e-05, "loss": 0.6678, "step": 3715, "task_loss": 0.2915074825286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.702324390411377, "epoch": 3.14, "learning_rate": 3.429416737109045e-05, "loss": 0.5891, "step": 3716, "task_loss": 0.2192486971616745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8353461027145386, "epoch": 3.14, "learning_rate": 3.4289940828402364e-05, "loss": 0.8046, "step": 3717, "task_loss": 1.577061653137207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9514777064323425, "epoch": 3.14, "learning_rate": 3.428571428571429e-05, "loss": 0.787, "step": 3718, "task_loss": 1.2960538864135742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47294703125953674, "epoch": 3.14, "learning_rate": 3.428148774302621e-05, "loss": 0.65, "step": 3719, "task_loss": 0.6880050301551819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41818463802337646, "epoch": 3.14, "learning_rate": 3.427726120033812e-05, "loss": 0.5719, "step": 3720, "task_loss": 0.6273998022079468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.656136155128479, "epoch": 3.15, "learning_rate": 3.427303465765004e-05, "loss": 0.4719, "step": 3721, "task_loss": 1.1080176830291748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4582267999649048, "epoch": 3.15, "learning_rate": 3.426880811496196e-05, "loss": 0.7449, "step": 3722, "task_loss": 1.2070543766021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6509822607040405, "epoch": 3.15, "learning_rate": 3.426458157227388e-05, "loss": 0.5769, "step": 3723, "task_loss": 1.1445403099060059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8488290309906006, "epoch": 3.15, "learning_rate": 3.42603550295858e-05, "loss": 0.7427, "step": 3724, "task_loss": 0.20830874145030975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8277235627174377, "epoch": 3.15, "learning_rate": 3.425612848689772e-05, "loss": 0.8952, "step": 3725, "task_loss": 0.7268683314323425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9796358942985535, "epoch": 3.15, "learning_rate": 3.4251901944209635e-05, "loss": 0.7004, "step": 3726, "task_loss": 0.7696370482444763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6433742046356201, "epoch": 3.15, "learning_rate": 3.4247675401521555e-05, "loss": 0.6158, "step": 3727, "task_loss": 0.4722282588481903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5639978647232056, "epoch": 3.15, "learning_rate": 3.4243448858833474e-05, "loss": 0.6768, "step": 3728, "task_loss": 0.49071016907691956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5041612386703491, "epoch": 3.15, "learning_rate": 3.4239222316145394e-05, "loss": 0.5287, "step": 3729, "task_loss": 0.4251966178417206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4461835026741028, "epoch": 3.15, "learning_rate": 3.4234995773457314e-05, "loss": 0.5905, "step": 3730, "task_loss": 0.896463930606842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0472036600112915, "epoch": 3.15, "learning_rate": 3.4230769230769234e-05, "loss": 0.6551, "step": 3731, "task_loss": 0.7908580303192139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5743104219436646, "epoch": 3.15, "learning_rate": 3.422654268808115e-05, "loss": 0.6424, "step": 3732, "task_loss": 0.5943659543991089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5084221363067627, "epoch": 3.16, "learning_rate": 3.4222316145393066e-05, "loss": 0.7514, "step": 3733, "task_loss": 1.2541954517364502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5626847147941589, "epoch": 3.16, "learning_rate": 3.4218089602704986e-05, "loss": 0.6204, "step": 3734, "task_loss": 0.8054807782173157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7527306079864502, "epoch": 3.16, "learning_rate": 3.421386306001691e-05, "loss": 0.7216, "step": 3735, "task_loss": 1.3229390382766724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36895665526390076, "epoch": 3.16, "learning_rate": 3.4209636517328825e-05, "loss": 0.5362, "step": 3736, "task_loss": 1.2897366285324097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6206696033477783, "epoch": 3.16, "learning_rate": 3.4205409974640745e-05, "loss": 0.6655, "step": 3737, "task_loss": 0.749636173248291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6683449149131775, "epoch": 3.16, "learning_rate": 3.4201183431952665e-05, "loss": 0.5945, "step": 3738, "task_loss": 1.2045040130615234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3047749996185303, "epoch": 3.16, "learning_rate": 3.419695688926458e-05, "loss": 0.5077, "step": 3739, "task_loss": 0.3758259117603302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7239495515823364, "epoch": 3.16, "learning_rate": 3.4192730346576504e-05, "loss": 0.572, "step": 3740, "task_loss": 1.604012370109558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8683885931968689, "epoch": 3.16, "learning_rate": 3.4188503803888424e-05, "loss": 0.7127, "step": 3741, "task_loss": 0.8793639540672302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7540621757507324, "epoch": 3.16, "learning_rate": 3.418427726120034e-05, "loss": 0.791, "step": 3742, "task_loss": 0.9764981865882874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7124232053756714, "epoch": 3.16, "learning_rate": 3.418005071851226e-05, "loss": 0.6411, "step": 3743, "task_loss": 0.8723115921020508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6217201948165894, "epoch": 3.16, "learning_rate": 3.4175824175824177e-05, "loss": 0.6833, "step": 3744, "task_loss": 1.2725998163223267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7923558354377747, "epoch": 3.17, "learning_rate": 3.4171597633136096e-05, "loss": 0.6733, "step": 3745, "task_loss": 0.701529860496521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4069065451622009, "epoch": 3.17, "learning_rate": 3.4167371090448016e-05, "loss": 0.8203, "step": 3746, "task_loss": 0.5882713794708252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7379698753356934, "epoch": 3.17, "learning_rate": 3.4163144547759936e-05, "loss": 0.6767, "step": 3747, "task_loss": 0.5868172645568848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5990483164787292, "epoch": 3.17, "learning_rate": 3.4158918005071856e-05, "loss": 0.6854, "step": 3748, "task_loss": 0.8165649771690369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7794822454452515, "epoch": 3.17, "learning_rate": 3.415469146238377e-05, "loss": 0.9131, "step": 3749, "task_loss": 0.5788837671279907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.587905764579773, "epoch": 3.17, "learning_rate": 3.415046491969569e-05, "loss": 0.5252, "step": 3750, "task_loss": 0.28707078099250793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3462509512901306, "epoch": 3.17, "learning_rate": 3.414623837700761e-05, "loss": 0.5549, "step": 3751, "task_loss": 0.26485949754714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36292195320129395, "epoch": 3.17, "learning_rate": 3.414201183431953e-05, "loss": 0.5105, "step": 3752, "task_loss": 0.3786086142063141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6408847570419312, "epoch": 3.17, "learning_rate": 3.413778529163145e-05, "loss": 0.6511, "step": 3753, "task_loss": 0.6492927074432373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45942166447639465, "epoch": 3.17, "learning_rate": 3.413355874894337e-05, "loss": 0.6294, "step": 3754, "task_loss": 1.1160677671432495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5613938570022583, "epoch": 3.17, "learning_rate": 3.412933220625528e-05, "loss": 0.6821, "step": 3755, "task_loss": 0.9854567050933838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3202049136161804, "epoch": 3.17, "learning_rate": 3.41251056635672e-05, "loss": 0.6158, "step": 3756, "task_loss": 0.10007906705141068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4958118796348572, "epoch": 3.18, "learning_rate": 3.4120879120879126e-05, "loss": 0.6717, "step": 3757, "task_loss": 0.8764507174491882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.493516743183136, "epoch": 3.18, "learning_rate": 3.4116652578191046e-05, "loss": 0.5277, "step": 3758, "task_loss": 0.9716719388961792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5764971971511841, "epoch": 3.18, "learning_rate": 3.411242603550296e-05, "loss": 0.6181, "step": 3759, "task_loss": 1.2201251983642578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8238059282302856, "epoch": 3.18, "learning_rate": 3.410819949281488e-05, "loss": 0.8158, "step": 3760, "task_loss": 0.4241720139980316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4805119037628174, "epoch": 3.18, "learning_rate": 3.41039729501268e-05, "loss": 0.6478, "step": 3761, "task_loss": 0.4531811475753784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5111087560653687, "epoch": 3.18, "learning_rate": 3.409974640743872e-05, "loss": 0.5142, "step": 3762, "task_loss": 0.688961386680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5467681288719177, "epoch": 3.18, "learning_rate": 3.409551986475064e-05, "loss": 0.6499, "step": 3763, "task_loss": 0.31389936804771423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6697627305984497, "epoch": 3.18, "learning_rate": 3.409129332206256e-05, "loss": 0.5545, "step": 3764, "task_loss": 1.1988121271133423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6300317645072937, "epoch": 3.18, "learning_rate": 3.408706677937447e-05, "loss": 0.7127, "step": 3765, "task_loss": 0.39688840508461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5210208892822266, "epoch": 3.18, "learning_rate": 3.408284023668639e-05, "loss": 0.5281, "step": 3766, "task_loss": 1.0124322175979614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8983035683631897, "epoch": 3.18, "learning_rate": 3.407861369399831e-05, "loss": 0.6345, "step": 3767, "task_loss": 1.0718694925308228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6712415218353271, "epoch": 3.19, "learning_rate": 3.407438715131023e-05, "loss": 0.4919, "step": 3768, "task_loss": 1.2112324237823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9172640442848206, "epoch": 3.19, "learning_rate": 3.407016060862215e-05, "loss": 0.7208, "step": 3769, "task_loss": 0.9262511134147644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7018153667449951, "epoch": 3.19, "learning_rate": 3.406593406593407e-05, "loss": 0.792, "step": 3770, "task_loss": 0.3578963279724121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.744202196598053, "epoch": 3.19, "learning_rate": 3.406170752324598e-05, "loss": 0.8204, "step": 3771, "task_loss": 1.2293626070022583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34972304105758667, "epoch": 3.19, "learning_rate": 3.40574809805579e-05, "loss": 0.5734, "step": 3772, "task_loss": 0.7316970229148865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5744720697402954, "epoch": 3.19, "learning_rate": 3.405325443786982e-05, "loss": 0.8727, "step": 3773, "task_loss": 1.7128212451934814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.555213212966919, "epoch": 3.19, "learning_rate": 3.404902789518175e-05, "loss": 0.6076, "step": 3774, "task_loss": 1.0635849237442017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6892537474632263, "epoch": 3.19, "learning_rate": 3.404480135249366e-05, "loss": 0.7379, "step": 3775, "task_loss": 0.959374725818634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40325430035591125, "epoch": 3.19, "learning_rate": 3.404057480980558e-05, "loss": 0.6955, "step": 3776, "task_loss": 0.6035494804382324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4382266104221344, "epoch": 3.19, "learning_rate": 3.40363482671175e-05, "loss": 0.5323, "step": 3777, "task_loss": 0.7823032736778259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6094955801963806, "epoch": 3.19, "learning_rate": 3.4032121724429414e-05, "loss": 0.5933, "step": 3778, "task_loss": 0.29179009795188904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6489497423171997, "epoch": 3.19, "learning_rate": 3.402789518174134e-05, "loss": 0.6234, "step": 3779, "task_loss": 0.5940364599227905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5587323307991028, "epoch": 3.2, "learning_rate": 3.402366863905326e-05, "loss": 0.6553, "step": 3780, "task_loss": 1.3246991634368896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5941123962402344, "epoch": 3.2, "learning_rate": 3.401944209636517e-05, "loss": 0.7707, "step": 3781, "task_loss": 0.47114717960357666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2677892446517944, "epoch": 3.2, "learning_rate": 3.401521555367709e-05, "loss": 0.7269, "step": 3782, "task_loss": 2.1561338901519775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5927201509475708, "epoch": 3.2, "learning_rate": 3.401098901098901e-05, "loss": 0.6243, "step": 3783, "task_loss": 0.8887702822685242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5167626142501831, "epoch": 3.2, "learning_rate": 3.400676246830093e-05, "loss": 0.6644, "step": 3784, "task_loss": 0.4265687167644501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2229374647140503, "epoch": 3.2, "learning_rate": 3.400253592561285e-05, "loss": 0.6521, "step": 3785, "task_loss": 1.2462905645370483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6373627185821533, "epoch": 3.2, "learning_rate": 3.399830938292477e-05, "loss": 0.5816, "step": 3786, "task_loss": 0.30056482553482056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5628971457481384, "epoch": 3.2, "learning_rate": 3.399408284023669e-05, "loss": 0.4788, "step": 3787, "task_loss": 0.7161725759506226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2736262381076813, "epoch": 3.2, "learning_rate": 3.3989856297548604e-05, "loss": 0.6858, "step": 3788, "task_loss": 0.6323650479316711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6507004499435425, "epoch": 3.2, "learning_rate": 3.3985629754860524e-05, "loss": 0.5559, "step": 3789, "task_loss": 0.8334919810295105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7830895781517029, "epoch": 3.2, "learning_rate": 3.3981403212172444e-05, "loss": 0.7053, "step": 3790, "task_loss": 0.5140246748924255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5599095821380615, "epoch": 3.2, "learning_rate": 3.3977176669484364e-05, "loss": 0.5615, "step": 3791, "task_loss": 0.7034794092178345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5156378746032715, "epoch": 3.21, "learning_rate": 3.397295012679628e-05, "loss": 0.6541, "step": 3792, "task_loss": 0.47448819875717163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6824741959571838, "epoch": 3.21, "learning_rate": 3.39687235841082e-05, "loss": 0.7199, "step": 3793, "task_loss": 1.0980820655822754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5753838419914246, "epoch": 3.21, "learning_rate": 3.3964497041420116e-05, "loss": 0.6556, "step": 3794, "task_loss": 0.34962376952171326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7017492055892944, "epoch": 3.21, "learning_rate": 3.3960270498732036e-05, "loss": 0.5851, "step": 3795, "task_loss": 1.6766815185546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5991634130477905, "epoch": 3.21, "learning_rate": 3.395604395604396e-05, "loss": 0.5041, "step": 3796, "task_loss": 0.90635085105896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5647245645523071, "epoch": 3.21, "learning_rate": 3.3951817413355875e-05, "loss": 0.5586, "step": 3797, "task_loss": 0.7481592297554016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5071923732757568, "epoch": 3.21, "learning_rate": 3.3947590870667795e-05, "loss": 0.6085, "step": 3798, "task_loss": 0.41574627161026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7173941135406494, "epoch": 3.21, "learning_rate": 3.3943364327979715e-05, "loss": 0.8112, "step": 3799, "task_loss": 0.42481133341789246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9913312792778015, "epoch": 3.21, "learning_rate": 3.393913778529163e-05, "loss": 0.7504, "step": 3800, "task_loss": 0.7806087136268616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6741906404495239, "epoch": 3.21, "learning_rate": 3.3934911242603554e-05, "loss": 0.772, "step": 3801, "task_loss": 0.6874298453330994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39445626735687256, "epoch": 3.21, "learning_rate": 3.3930684699915474e-05, "loss": 0.6312, "step": 3802, "task_loss": 0.8107923865318298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.776245653629303, "epoch": 3.21, "learning_rate": 3.3926458157227394e-05, "loss": 0.6365, "step": 3803, "task_loss": 0.5789065957069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6302706003189087, "epoch": 3.22, "learning_rate": 3.392223161453931e-05, "loss": 0.7743, "step": 3804, "task_loss": 1.3697251081466675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6761274337768555, "epoch": 3.22, "learning_rate": 3.3918005071851226e-05, "loss": 0.8965, "step": 3805, "task_loss": 0.5946615934371948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9160383939743042, "epoch": 3.22, "learning_rate": 3.3913778529163146e-05, "loss": 0.6498, "step": 3806, "task_loss": 0.6681810617446899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45323342084884644, "epoch": 3.22, "learning_rate": 3.3909551986475066e-05, "loss": 0.5182, "step": 3807, "task_loss": 0.9610430002212524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5408672094345093, "epoch": 3.22, "learning_rate": 3.3905325443786986e-05, "loss": 0.5985, "step": 3808, "task_loss": 0.6297504901885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.679121732711792, "epoch": 3.22, "learning_rate": 3.3901098901098905e-05, "loss": 0.7845, "step": 3809, "task_loss": 0.3974277973175049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5405347347259521, "epoch": 3.22, "learning_rate": 3.389687235841082e-05, "loss": 0.7444, "step": 3810, "task_loss": 0.9425486922264099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5738269090652466, "epoch": 3.22, "learning_rate": 3.389264581572274e-05, "loss": 0.6617, "step": 3811, "task_loss": 1.0415279865264893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3114665150642395, "epoch": 3.22, "learning_rate": 3.388841927303466e-05, "loss": 0.6941, "step": 3812, "task_loss": 0.24100200831890106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9643857479095459, "epoch": 3.22, "learning_rate": 3.388419273034658e-05, "loss": 0.7422, "step": 3813, "task_loss": 0.9303734302520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2126593589782715, "epoch": 3.22, "learning_rate": 3.38799661876585e-05, "loss": 0.674, "step": 3814, "task_loss": 1.382112979888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7923911809921265, "epoch": 3.22, "learning_rate": 3.387573964497042e-05, "loss": 0.7357, "step": 3815, "task_loss": 2.0474979877471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8287831544876099, "epoch": 3.23, "learning_rate": 3.387151310228234e-05, "loss": 0.7787, "step": 3816, "task_loss": 0.9554091691970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5383998155593872, "epoch": 3.23, "learning_rate": 3.386728655959425e-05, "loss": 0.6112, "step": 3817, "task_loss": 0.900924801826477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49445056915283203, "epoch": 3.23, "learning_rate": 3.3863060016906176e-05, "loss": 0.6665, "step": 3818, "task_loss": 0.993209719657898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.619189977645874, "epoch": 3.23, "learning_rate": 3.3858833474218096e-05, "loss": 0.5328, "step": 3819, "task_loss": 0.649749219417572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8845866322517395, "epoch": 3.23, "learning_rate": 3.385460693153001e-05, "loss": 0.7296, "step": 3820, "task_loss": 1.6628559827804565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.568413496017456, "epoch": 3.23, "learning_rate": 3.385038038884193e-05, "loss": 0.7596, "step": 3821, "task_loss": 0.7251441478729248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8410848379135132, "epoch": 3.23, "learning_rate": 3.384615384615385e-05, "loss": 0.6969, "step": 3822, "task_loss": 1.9016329050064087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6509816646575928, "epoch": 3.23, "learning_rate": 3.384192730346576e-05, "loss": 0.5989, "step": 3823, "task_loss": 0.6418876051902771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43573498725891113, "epoch": 3.23, "learning_rate": 3.383770076077769e-05, "loss": 0.6344, "step": 3824, "task_loss": 0.7386416792869568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5029503107070923, "epoch": 3.23, "learning_rate": 3.383347421808961e-05, "loss": 0.6811, "step": 3825, "task_loss": 0.2797193229198456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.555972695350647, "epoch": 3.23, "learning_rate": 3.382924767540152e-05, "loss": 0.5548, "step": 3826, "task_loss": 0.5922855138778687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9157826900482178, "epoch": 3.23, "learning_rate": 3.382502113271344e-05, "loss": 0.717, "step": 3827, "task_loss": 0.3756215572357178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.687616229057312, "epoch": 3.24, "learning_rate": 3.382079459002536e-05, "loss": 0.593, "step": 3828, "task_loss": 0.47909826040267944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36408451199531555, "epoch": 3.24, "learning_rate": 3.381656804733728e-05, "loss": 0.5341, "step": 3829, "task_loss": 0.3287026286125183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44365108013153076, "epoch": 3.24, "learning_rate": 3.38123415046492e-05, "loss": 0.6658, "step": 3830, "task_loss": 0.2332337200641632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40200281143188477, "epoch": 3.24, "learning_rate": 3.380811496196112e-05, "loss": 0.5684, "step": 3831, "task_loss": 0.5256891846656799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4989927411079407, "epoch": 3.24, "learning_rate": 3.380388841927304e-05, "loss": 0.5325, "step": 3832, "task_loss": 1.267116665840149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7334017157554626, "epoch": 3.24, "learning_rate": 3.379966187658495e-05, "loss": 0.56, "step": 3833, "task_loss": 0.5534391403198242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4674106538295746, "epoch": 3.24, "learning_rate": 3.379543533389687e-05, "loss": 0.6094, "step": 3834, "task_loss": 1.1380066871643066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5072059631347656, "epoch": 3.24, "learning_rate": 3.37912087912088e-05, "loss": 0.5302, "step": 3835, "task_loss": 1.4863840341567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6701310873031616, "epoch": 3.24, "learning_rate": 3.378698224852071e-05, "loss": 0.645, "step": 3836, "task_loss": 0.9406919479370117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4624938368797302, "epoch": 3.24, "learning_rate": 3.378275570583263e-05, "loss": 0.5949, "step": 3837, "task_loss": 0.7320272922515869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48286527395248413, "epoch": 3.24, "learning_rate": 3.377852916314455e-05, "loss": 0.6212, "step": 3838, "task_loss": 0.4311377704143524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3031708896160126, "epoch": 3.24, "learning_rate": 3.3774302620456464e-05, "loss": 0.7132, "step": 3839, "task_loss": 0.2939806282520294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35981088876724243, "epoch": 3.25, "learning_rate": 3.377007607776838e-05, "loss": 0.4392, "step": 3840, "task_loss": 0.8133130669593811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8086020350456238, "epoch": 3.25, "learning_rate": 3.376584953508031e-05, "loss": 0.6996, "step": 3841, "task_loss": 1.0303314924240112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5664933919906616, "epoch": 3.25, "learning_rate": 3.376162299239222e-05, "loss": 0.5265, "step": 3842, "task_loss": 0.5581521987915039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.596442699432373, "epoch": 3.25, "learning_rate": 3.375739644970414e-05, "loss": 0.9495, "step": 3843, "task_loss": 1.3157193660736084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45182183384895325, "epoch": 3.25, "learning_rate": 3.375316990701606e-05, "loss": 0.6561, "step": 3844, "task_loss": 0.8621368408203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5898299813270569, "epoch": 3.25, "learning_rate": 3.374894336432798e-05, "loss": 0.6432, "step": 3845, "task_loss": 0.6100964546203613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4014677405357361, "epoch": 3.25, "learning_rate": 3.37447168216399e-05, "loss": 0.5069, "step": 3846, "task_loss": 0.11786511540412903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8498715162277222, "epoch": 3.25, "learning_rate": 3.374049027895182e-05, "loss": 0.9378, "step": 3847, "task_loss": 0.8390326499938965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6927406787872314, "epoch": 3.25, "learning_rate": 3.373626373626374e-05, "loss": 0.8032, "step": 3848, "task_loss": 0.8705059289932251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7547593712806702, "epoch": 3.25, "learning_rate": 3.3732037193575654e-05, "loss": 0.7301, "step": 3849, "task_loss": 1.809188961982727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3883437514305115, "epoch": 3.25, "learning_rate": 3.3727810650887574e-05, "loss": 0.488, "step": 3850, "task_loss": 0.07945055514574051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9190082550048828, "epoch": 3.26, "learning_rate": 3.3723584108199494e-05, "loss": 0.5127, "step": 3851, "task_loss": 0.8374632596969604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5903159379959106, "epoch": 3.26, "learning_rate": 3.3719357565511413e-05, "loss": 0.7062, "step": 3852, "task_loss": 0.6006965041160583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4534945785999298, "epoch": 3.26, "learning_rate": 3.371513102282333e-05, "loss": 0.4788, "step": 3853, "task_loss": 0.24314014613628387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5452814102172852, "epoch": 3.26, "learning_rate": 3.371090448013525e-05, "loss": 0.8426, "step": 3854, "task_loss": 0.9421284794807434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9109559059143066, "epoch": 3.26, "learning_rate": 3.3706677937447166e-05, "loss": 0.698, "step": 3855, "task_loss": 1.0641039609909058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4101806879043579, "epoch": 3.26, "learning_rate": 3.3702451394759086e-05, "loss": 0.5488, "step": 3856, "task_loss": 1.55475652217865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3298285007476807, "epoch": 3.26, "learning_rate": 3.3698224852071005e-05, "loss": 0.6337, "step": 3857, "task_loss": 0.8242889046669006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7067632675170898, "epoch": 3.26, "learning_rate": 3.3693998309382925e-05, "loss": 0.6156, "step": 3858, "task_loss": 1.3002690076828003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6895588636398315, "epoch": 3.26, "learning_rate": 3.3689771766694845e-05, "loss": 0.5168, "step": 3859, "task_loss": 0.4281080663204193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4891388416290283, "epoch": 3.26, "learning_rate": 3.3685545224006765e-05, "loss": 0.633, "step": 3860, "task_loss": 0.5239247679710388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0667812824249268, "epoch": 3.26, "learning_rate": 3.3681318681318684e-05, "loss": 0.724, "step": 3861, "task_loss": 1.9683423042297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44475460052490234, "epoch": 3.26, "learning_rate": 3.36770921386306e-05, "loss": 0.6531, "step": 3862, "task_loss": 0.3908093571662903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4851522445678711, "epoch": 3.27, "learning_rate": 3.3672865595942524e-05, "loss": 0.6904, "step": 3863, "task_loss": 0.46170082688331604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814062774181366, "epoch": 3.27, "learning_rate": 3.3668639053254444e-05, "loss": 0.4736, "step": 3864, "task_loss": 0.6820520758628845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5738285183906555, "epoch": 3.27, "learning_rate": 3.3664412510566357e-05, "loss": 0.5907, "step": 3865, "task_loss": 0.7286311388015747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48386770486831665, "epoch": 3.27, "learning_rate": 3.3660185967878276e-05, "loss": 0.5048, "step": 3866, "task_loss": 0.5828297138214111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.026790976524353, "epoch": 3.27, "learning_rate": 3.3655959425190196e-05, "loss": 0.8365, "step": 3867, "task_loss": 0.7935538291931152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38497093319892883, "epoch": 3.27, "learning_rate": 3.3651732882502116e-05, "loss": 0.4633, "step": 3868, "task_loss": 0.2598867416381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9358500838279724, "epoch": 3.27, "learning_rate": 3.3647506339814035e-05, "loss": 0.6402, "step": 3869, "task_loss": 1.0280195474624634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6167775988578796, "epoch": 3.27, "learning_rate": 3.3643279797125955e-05, "loss": 0.7177, "step": 3870, "task_loss": 0.9789909720420837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3970627188682556, "epoch": 3.27, "learning_rate": 3.363905325443787e-05, "loss": 0.6239, "step": 3871, "task_loss": 1.4325599670410156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9278029203414917, "epoch": 3.27, "learning_rate": 3.363482671174979e-05, "loss": 0.6757, "step": 3872, "task_loss": 1.0422202348709106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1656203269958496, "epoch": 3.27, "learning_rate": 3.363060016906171e-05, "loss": 0.8446, "step": 3873, "task_loss": 1.0028610229492188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.553748607635498, "epoch": 3.27, "learning_rate": 3.362637362637363e-05, "loss": 0.4543, "step": 3874, "task_loss": 0.4587751030921936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45819446444511414, "epoch": 3.28, "learning_rate": 3.362214708368555e-05, "loss": 0.7845, "step": 3875, "task_loss": 0.7192927598953247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7986859679222107, "epoch": 3.28, "learning_rate": 3.361792054099747e-05, "loss": 0.6381, "step": 3876, "task_loss": 0.9620726704597473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6307942867279053, "epoch": 3.28, "learning_rate": 3.3613693998309387e-05, "loss": 0.8671, "step": 3877, "task_loss": 0.48219799995422363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6521656513214111, "epoch": 3.28, "learning_rate": 3.36094674556213e-05, "loss": 0.5479, "step": 3878, "task_loss": 1.2433865070343018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0597290992736816, "epoch": 3.28, "learning_rate": 3.360524091293322e-05, "loss": 0.7366, "step": 3879, "task_loss": 0.36986494064331055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6747319102287292, "epoch": 3.28, "learning_rate": 3.3601014370245146e-05, "loss": 0.6707, "step": 3880, "task_loss": 2.446615219116211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.581082820892334, "epoch": 3.28, "learning_rate": 3.359678782755706e-05, "loss": 0.6838, "step": 3881, "task_loss": 0.8097153306007385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49912330508232117, "epoch": 3.28, "learning_rate": 3.359256128486898e-05, "loss": 0.595, "step": 3882, "task_loss": 0.3019488751888275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4520947337150574, "epoch": 3.28, "learning_rate": 3.35883347421809e-05, "loss": 0.4635, "step": 3883, "task_loss": 0.552654504776001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5838029384613037, "epoch": 3.28, "learning_rate": 3.358410819949281e-05, "loss": 0.6597, "step": 3884, "task_loss": 1.3630002737045288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31268203258514404, "epoch": 3.28, "learning_rate": 3.357988165680474e-05, "loss": 0.5935, "step": 3885, "task_loss": 0.06699737161397934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7134374380111694, "epoch": 3.28, "learning_rate": 3.357565511411666e-05, "loss": 0.719, "step": 3886, "task_loss": 0.5727769136428833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9589802622795105, "epoch": 3.29, "learning_rate": 3.357142857142857e-05, "loss": 0.7402, "step": 3887, "task_loss": 0.7933779954910278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9160937070846558, "epoch": 3.29, "learning_rate": 3.356720202874049e-05, "loss": 0.6912, "step": 3888, "task_loss": 1.1651965379714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5411211252212524, "epoch": 3.29, "learning_rate": 3.356297548605241e-05, "loss": 0.7335, "step": 3889, "task_loss": 1.470801591873169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3677292764186859, "epoch": 3.29, "learning_rate": 3.355874894336433e-05, "loss": 0.6632, "step": 3890, "task_loss": 0.8832083344459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41046008467674255, "epoch": 3.29, "learning_rate": 3.355452240067625e-05, "loss": 0.6883, "step": 3891, "task_loss": 0.33027833700180054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49018967151641846, "epoch": 3.29, "learning_rate": 3.355029585798817e-05, "loss": 0.554, "step": 3892, "task_loss": 0.7048411965370178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7225145101547241, "epoch": 3.29, "learning_rate": 3.354606931530009e-05, "loss": 0.548, "step": 3893, "task_loss": 1.3113082647323608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42464256286621094, "epoch": 3.29, "learning_rate": 3.3541842772612e-05, "loss": 0.6451, "step": 3894, "task_loss": 0.5623482465744019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5087553262710571, "epoch": 3.29, "learning_rate": 3.353761622992392e-05, "loss": 0.5388, "step": 3895, "task_loss": 0.5432758331298828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4386586546897888, "epoch": 3.29, "learning_rate": 3.353338968723584e-05, "loss": 0.6313, "step": 3896, "task_loss": 0.6271265745162964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9487965106964111, "epoch": 3.29, "learning_rate": 3.352916314454776e-05, "loss": 0.8178, "step": 3897, "task_loss": 1.5867424011230469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34835946559906006, "epoch": 3.29, "learning_rate": 3.352493660185968e-05, "loss": 0.637, "step": 3898, "task_loss": 1.0247727632522583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4320606291294098, "epoch": 3.3, "learning_rate": 3.35207100591716e-05, "loss": 0.6995, "step": 3899, "task_loss": 1.3939924240112305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49219024181365967, "epoch": 3.3, "learning_rate": 3.3516483516483513e-05, "loss": 0.6035, "step": 3900, "task_loss": 1.4756388664245605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3515661060810089, "epoch": 3.3, "learning_rate": 3.351225697379543e-05, "loss": 0.4611, "step": 3901, "task_loss": 0.5603144764900208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0203819274902344, "epoch": 3.3, "learning_rate": 3.350803043110736e-05, "loss": 0.7224, "step": 3902, "task_loss": 1.6708731651306152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4209211766719818, "epoch": 3.3, "learning_rate": 3.350380388841928e-05, "loss": 0.631, "step": 3903, "task_loss": 1.2993181943893433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7542859315872192, "epoch": 3.3, "learning_rate": 3.349957734573119e-05, "loss": 0.6682, "step": 3904, "task_loss": 1.2208454608917236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5419905185699463, "epoch": 3.3, "learning_rate": 3.349535080304311e-05, "loss": 0.5774, "step": 3905, "task_loss": 0.4915432035923004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7790452241897583, "epoch": 3.3, "learning_rate": 3.349112426035503e-05, "loss": 0.6745, "step": 3906, "task_loss": 0.6758826375007629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7102470993995667, "epoch": 3.3, "learning_rate": 3.348689771766695e-05, "loss": 0.723, "step": 3907, "task_loss": 1.0424365997314453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4168453514575958, "epoch": 3.3, "learning_rate": 3.348267117497887e-05, "loss": 0.7707, "step": 3908, "task_loss": 0.6616557240486145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5598769783973694, "epoch": 3.3, "learning_rate": 3.347844463229079e-05, "loss": 0.5769, "step": 3909, "task_loss": 0.7414443492889404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8472377061843872, "epoch": 3.3, "learning_rate": 3.3474218089602704e-05, "loss": 0.8324, "step": 3910, "task_loss": 0.6906710863113403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22270134091377258, "epoch": 3.31, "learning_rate": 3.3469991546914624e-05, "loss": 0.6163, "step": 3911, "task_loss": 0.08123055845499039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7566246390342712, "epoch": 3.31, "learning_rate": 3.3465765004226544e-05, "loss": 0.8558, "step": 3912, "task_loss": 0.8362683057785034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43627476692199707, "epoch": 3.31, "learning_rate": 3.346153846153846e-05, "loss": 0.5194, "step": 3913, "task_loss": 0.7641618847846985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35899990797042847, "epoch": 3.31, "learning_rate": 3.345731191885038e-05, "loss": 0.5453, "step": 3914, "task_loss": 0.14782261848449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5322375297546387, "epoch": 3.31, "learning_rate": 3.34530853761623e-05, "loss": 0.4527, "step": 3915, "task_loss": 0.708094596862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7115599513053894, "epoch": 3.31, "learning_rate": 3.3448858833474216e-05, "loss": 0.4722, "step": 3916, "task_loss": 1.097058892250061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4993571937084198, "epoch": 3.31, "learning_rate": 3.3444632290786135e-05, "loss": 0.7615, "step": 3917, "task_loss": 0.5412318706512451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6557318568229675, "epoch": 3.31, "learning_rate": 3.3440405748098055e-05, "loss": 0.703, "step": 3918, "task_loss": 0.5193818807601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5269460082054138, "epoch": 3.31, "learning_rate": 3.343617920540998e-05, "loss": 0.5943, "step": 3919, "task_loss": 0.9898266792297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3797948956489563, "epoch": 3.31, "learning_rate": 3.3431952662721895e-05, "loss": 0.6485, "step": 3920, "task_loss": 0.37862321734428406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0981862545013428, "epoch": 3.31, "learning_rate": 3.3427726120033814e-05, "loss": 1.0162, "step": 3921, "task_loss": 0.2785789966583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6004927158355713, "epoch": 3.32, "learning_rate": 3.3423499577345734e-05, "loss": 0.6426, "step": 3922, "task_loss": 1.1608461141586304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7121838331222534, "epoch": 3.32, "learning_rate": 3.341927303465765e-05, "loss": 0.7206, "step": 3923, "task_loss": 1.717772364616394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8762622475624084, "epoch": 3.32, "learning_rate": 3.3415046491969574e-05, "loss": 0.6832, "step": 3924, "task_loss": 2.074665069580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30707815289497375, "epoch": 3.32, "learning_rate": 3.341081994928149e-05, "loss": 0.7001, "step": 3925, "task_loss": 0.7765181064605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6167467832565308, "epoch": 3.32, "learning_rate": 3.3406593406593406e-05, "loss": 0.6838, "step": 3926, "task_loss": 0.53131103515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44397321343421936, "epoch": 3.32, "learning_rate": 3.3402366863905326e-05, "loss": 0.5643, "step": 3927, "task_loss": 0.3047277331352234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3708931803703308, "epoch": 3.32, "learning_rate": 3.3398140321217246e-05, "loss": 0.4935, "step": 3928, "task_loss": 0.38013702630996704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8187675476074219, "epoch": 3.32, "learning_rate": 3.3393913778529166e-05, "loss": 0.6628, "step": 3929, "task_loss": 0.5122108459472656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.649563193321228, "epoch": 3.32, "learning_rate": 3.3389687235841085e-05, "loss": 0.7425, "step": 3930, "task_loss": 0.8000655770301819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26284122467041016, "epoch": 3.32, "learning_rate": 3.3385460693153005e-05, "loss": 0.5002, "step": 3931, "task_loss": 0.17851315438747406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6204005479812622, "epoch": 3.32, "learning_rate": 3.3381234150464925e-05, "loss": 0.641, "step": 3932, "task_loss": 0.3291212022304535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.682673454284668, "epoch": 3.32, "learning_rate": 3.337700760777684e-05, "loss": 0.6991, "step": 3933, "task_loss": 1.032178521156311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6322749853134155, "epoch": 3.33, "learning_rate": 3.337278106508876e-05, "loss": 0.5521, "step": 3934, "task_loss": 0.7650724649429321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6840622425079346, "epoch": 3.33, "learning_rate": 3.336855452240068e-05, "loss": 0.7621, "step": 3935, "task_loss": 0.6678985953330994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5300444960594177, "epoch": 3.33, "learning_rate": 3.33643279797126e-05, "loss": 0.7967, "step": 3936, "task_loss": 1.1937285661697388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41465961933135986, "epoch": 3.33, "learning_rate": 3.336010143702452e-05, "loss": 0.5593, "step": 3937, "task_loss": 0.2977862060070038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5342230796813965, "epoch": 3.33, "learning_rate": 3.3355874894336436e-05, "loss": 0.6051, "step": 3938, "task_loss": 0.7423573732376099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4787595272064209, "epoch": 3.33, "learning_rate": 3.335164835164835e-05, "loss": 0.4094, "step": 3939, "task_loss": 0.6922428607940674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6558927893638611, "epoch": 3.33, "learning_rate": 3.334742180896027e-05, "loss": 0.71, "step": 3940, "task_loss": 0.267610102891922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42038998007774353, "epoch": 3.33, "learning_rate": 3.3343195266272196e-05, "loss": 0.7359, "step": 3941, "task_loss": 0.824495792388916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7075364589691162, "epoch": 3.33, "learning_rate": 3.333896872358411e-05, "loss": 0.6182, "step": 3942, "task_loss": 1.3665658235549927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49442434310913086, "epoch": 3.33, "learning_rate": 3.333474218089603e-05, "loss": 0.4703, "step": 3943, "task_loss": 0.14843980967998505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37423110008239746, "epoch": 3.33, "learning_rate": 3.333051563820795e-05, "loss": 0.491, "step": 3944, "task_loss": 1.5335365533828735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34091874957084656, "epoch": 3.33, "learning_rate": 3.332628909551986e-05, "loss": 0.5345, "step": 3945, "task_loss": 1.0860202312469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6558480858802795, "epoch": 3.34, "learning_rate": 3.332206255283179e-05, "loss": 0.6812, "step": 3946, "task_loss": 0.796873927116394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6395263075828552, "epoch": 3.34, "learning_rate": 3.331783601014371e-05, "loss": 0.5819, "step": 3947, "task_loss": 0.4183849096298218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49171847105026245, "epoch": 3.34, "learning_rate": 3.331360946745563e-05, "loss": 0.575, "step": 3948, "task_loss": 0.684720516204834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.578411340713501, "epoch": 3.34, "learning_rate": 3.330938292476754e-05, "loss": 0.6977, "step": 3949, "task_loss": 0.5862065553665161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4539857506752014, "epoch": 3.34, "learning_rate": 3.330515638207946e-05, "loss": 0.778, "step": 3950, "task_loss": 0.4472378194332123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3729032278060913, "epoch": 3.34, "learning_rate": 3.330092983939138e-05, "loss": 0.2935, "step": 3951, "task_loss": 0.6985999941825867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0239622592926025, "epoch": 3.34, "learning_rate": 3.32967032967033e-05, "loss": 0.6693, "step": 3952, "task_loss": 1.0546976327896118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7052303552627563, "epoch": 3.34, "learning_rate": 3.329247675401522e-05, "loss": 0.7982, "step": 3953, "task_loss": 0.3319380283355713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8787857294082642, "epoch": 3.34, "learning_rate": 3.328825021132714e-05, "loss": 0.6226, "step": 3954, "task_loss": 0.36139535903930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6386686563491821, "epoch": 3.34, "learning_rate": 3.328402366863905e-05, "loss": 0.6821, "step": 3955, "task_loss": 1.0463898181915283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5190571546554565, "epoch": 3.34, "learning_rate": 3.327979712595097e-05, "loss": 0.7957, "step": 3956, "task_loss": 0.6291264295578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5444065928459167, "epoch": 3.34, "learning_rate": 3.327557058326289e-05, "loss": 0.6873, "step": 3957, "task_loss": 0.7291675806045532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5625632405281067, "epoch": 3.35, "learning_rate": 3.327134404057481e-05, "loss": 0.6632, "step": 3958, "task_loss": 0.9873447418212891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5395931005477905, "epoch": 3.35, "learning_rate": 3.326711749788673e-05, "loss": 0.7347, "step": 3959, "task_loss": 0.2736806273460388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4505186975002289, "epoch": 3.35, "learning_rate": 3.326289095519865e-05, "loss": 0.5042, "step": 3960, "task_loss": 0.051546353846788406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6125847101211548, "epoch": 3.35, "learning_rate": 3.325866441251057e-05, "loss": 0.7314, "step": 3961, "task_loss": 0.5110401511192322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6178805232048035, "epoch": 3.35, "learning_rate": 3.325443786982248e-05, "loss": 0.5301, "step": 3962, "task_loss": 0.6961491703987122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3957834839820862, "epoch": 3.35, "learning_rate": 3.325021132713441e-05, "loss": 0.5786, "step": 3963, "task_loss": 1.042910099029541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5367882251739502, "epoch": 3.35, "learning_rate": 3.324598478444633e-05, "loss": 0.6244, "step": 3964, "task_loss": 0.8038404583930969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9926853775978088, "epoch": 3.35, "learning_rate": 3.324175824175824e-05, "loss": 0.6856, "step": 3965, "task_loss": 0.7768417000770569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9465579986572266, "epoch": 3.35, "learning_rate": 3.323753169907016e-05, "loss": 0.5704, "step": 3966, "task_loss": 0.8470343351364136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9539152383804321, "epoch": 3.35, "learning_rate": 3.323330515638208e-05, "loss": 0.611, "step": 3967, "task_loss": 1.830016851425171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4759204089641571, "epoch": 3.35, "learning_rate": 3.3229078613693995e-05, "loss": 0.6842, "step": 3968, "task_loss": 1.0724446773529053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7830750942230225, "epoch": 3.35, "learning_rate": 3.322485207100592e-05, "loss": 0.724, "step": 3969, "task_loss": 1.2209429740905762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5183069109916687, "epoch": 3.36, "learning_rate": 3.322062552831784e-05, "loss": 0.5557, "step": 3970, "task_loss": 0.08554243296384811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7506176233291626, "epoch": 3.36, "learning_rate": 3.3216398985629754e-05, "loss": 0.7632, "step": 3971, "task_loss": 1.3869366645812988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7739876508712769, "epoch": 3.36, "learning_rate": 3.3212172442941674e-05, "loss": 0.652, "step": 3972, "task_loss": 1.096614956855774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46949848532676697, "epoch": 3.36, "learning_rate": 3.3207945900253593e-05, "loss": 0.6517, "step": 3973, "task_loss": 0.4659460186958313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7188059091567993, "epoch": 3.36, "learning_rate": 3.320371935756551e-05, "loss": 0.5684, "step": 3974, "task_loss": 1.0373523235321045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.312154620885849, "epoch": 3.36, "learning_rate": 3.319949281487743e-05, "loss": 0.5088, "step": 3975, "task_loss": 0.7555842399597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8637068271636963, "epoch": 3.36, "learning_rate": 3.319526627218935e-05, "loss": 0.8029, "step": 3976, "task_loss": 0.5204150676727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7043997049331665, "epoch": 3.36, "learning_rate": 3.319103972950127e-05, "loss": 0.6963, "step": 3977, "task_loss": 1.1344348192214966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5696607232093811, "epoch": 3.36, "learning_rate": 3.3186813186813185e-05, "loss": 0.7128, "step": 3978, "task_loss": 0.496666818857193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5712813138961792, "epoch": 3.36, "learning_rate": 3.3182586644125105e-05, "loss": 0.6678, "step": 3979, "task_loss": 0.6548811197280884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4539666473865509, "epoch": 3.36, "learning_rate": 3.317836010143703e-05, "loss": 0.6589, "step": 3980, "task_loss": 0.21650294959545135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44822144508361816, "epoch": 3.36, "learning_rate": 3.3174133558748945e-05, "loss": 0.6086, "step": 3981, "task_loss": 0.14732351899147034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4881581664085388, "epoch": 3.37, "learning_rate": 3.3169907016060864e-05, "loss": 0.4735, "step": 3982, "task_loss": 0.2254583090543747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5408546924591064, "epoch": 3.37, "learning_rate": 3.3165680473372784e-05, "loss": 0.658, "step": 3983, "task_loss": 0.8050487637519836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.81545090675354, "epoch": 3.37, "learning_rate": 3.31614539306847e-05, "loss": 0.72, "step": 3984, "task_loss": 2.734854221343994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8490267992019653, "epoch": 3.37, "learning_rate": 3.315722738799662e-05, "loss": 0.7237, "step": 3985, "task_loss": 0.630405843257904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5908295512199402, "epoch": 3.37, "learning_rate": 3.315300084530854e-05, "loss": 0.5789, "step": 3986, "task_loss": 1.5488214492797852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5881787538528442, "epoch": 3.37, "learning_rate": 3.3148774302620456e-05, "loss": 0.5039, "step": 3987, "task_loss": 0.4973008930683136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4499143660068512, "epoch": 3.37, "learning_rate": 3.3144547759932376e-05, "loss": 0.6232, "step": 3988, "task_loss": 0.30243533849716187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5703656673431396, "epoch": 3.37, "learning_rate": 3.3140321217244296e-05, "loss": 0.5047, "step": 3989, "task_loss": 0.8970152735710144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8677539825439453, "epoch": 3.37, "learning_rate": 3.3136094674556215e-05, "loss": 0.8253, "step": 3990, "task_loss": 0.9921838641166687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0659618377685547, "epoch": 3.37, "learning_rate": 3.3131868131868135e-05, "loss": 0.7674, "step": 3991, "task_loss": 0.7682436108589172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7066988945007324, "epoch": 3.37, "learning_rate": 3.3127641589180055e-05, "loss": 0.6248, "step": 3992, "task_loss": 0.6049737930297852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7294638752937317, "epoch": 3.38, "learning_rate": 3.3123415046491975e-05, "loss": 0.6554, "step": 3993, "task_loss": 0.4263124465942383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38612285256385803, "epoch": 3.38, "learning_rate": 3.311918850380389e-05, "loss": 0.6898, "step": 3994, "task_loss": 0.1594415307044983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6931972503662109, "epoch": 3.38, "learning_rate": 3.311496196111581e-05, "loss": 0.5274, "step": 3995, "task_loss": 1.0477615594863892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7712715864181519, "epoch": 3.38, "learning_rate": 3.311073541842773e-05, "loss": 0.7052, "step": 3996, "task_loss": 0.6277380585670471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8982274532318115, "epoch": 3.38, "learning_rate": 3.310650887573965e-05, "loss": 0.6824, "step": 3997, "task_loss": 1.4021918773651123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5366221070289612, "epoch": 3.38, "learning_rate": 3.3102282333051567e-05, "loss": 0.7017, "step": 3998, "task_loss": 0.7381435632705688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9963749647140503, "epoch": 3.38, "learning_rate": 3.3098055790363486e-05, "loss": 0.7746, "step": 3999, "task_loss": 0.5824484825134277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3503539264202118, "epoch": 3.38, "learning_rate": 3.30938292476754e-05, "loss": 0.4668, "step": 4000, "task_loss": 0.7994530200958252 }, { "epoch": 3.38, "eval_accuracy": 0.8938217821782178, "eval_loss": 0.40639403462409973, "eval_runtime": 229.9162, "eval_samples_per_second": 109.823, "eval_steps_per_second": 0.861, "step": 4000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6074734330177307, "epoch": 3.38, "learning_rate": 3.308960270498732e-05, "loss": 0.628, "step": 4001, "task_loss": 0.7503520846366882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.092028021812439, "epoch": 3.38, "learning_rate": 3.308537616229924e-05, "loss": 0.6776, "step": 4002, "task_loss": 1.0173532962799072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.865687370300293, "epoch": 3.38, "learning_rate": 3.308114961961116e-05, "loss": 0.5595, "step": 4003, "task_loss": 1.0401179790496826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5781496167182922, "epoch": 3.38, "learning_rate": 3.307692307692308e-05, "loss": 0.4993, "step": 4004, "task_loss": 0.6292144060134888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.077748417854309, "epoch": 3.39, "learning_rate": 3.3072696534235e-05, "loss": 0.6869, "step": 4005, "task_loss": 0.8932000994682312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6122187972068787, "epoch": 3.39, "learning_rate": 3.306846999154692e-05, "loss": 0.5102, "step": 4006, "task_loss": 0.7503175735473633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5466530323028564, "epoch": 3.39, "learning_rate": 3.306424344885883e-05, "loss": 0.8431, "step": 4007, "task_loss": 0.5734150409698486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7588575482368469, "epoch": 3.39, "learning_rate": 3.306001690617076e-05, "loss": 0.6754, "step": 4008, "task_loss": 0.8582702279090881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.607350766658783, "epoch": 3.39, "learning_rate": 3.305579036348268e-05, "loss": 0.6199, "step": 4009, "task_loss": 0.5086784958839417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4584023356437683, "epoch": 3.39, "learning_rate": 3.305156382079459e-05, "loss": 0.5779, "step": 4010, "task_loss": 0.5685446262359619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5342463850975037, "epoch": 3.39, "learning_rate": 3.304733727810651e-05, "loss": 0.534, "step": 4011, "task_loss": 0.6923866868019104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3588854968547821, "epoch": 3.39, "learning_rate": 3.304311073541843e-05, "loss": 0.4724, "step": 4012, "task_loss": 1.0098603963851929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6950870752334595, "epoch": 3.39, "learning_rate": 3.303888419273035e-05, "loss": 0.7341, "step": 4013, "task_loss": 2.0473179817199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5922557711601257, "epoch": 3.39, "learning_rate": 3.303465765004227e-05, "loss": 0.6832, "step": 4014, "task_loss": 0.5787793397903442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6741911172866821, "epoch": 3.39, "learning_rate": 3.303043110735419e-05, "loss": 0.6301, "step": 4015, "task_loss": 0.7569441199302673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6243327856063843, "epoch": 3.39, "learning_rate": 3.30262045646661e-05, "loss": 0.5631, "step": 4016, "task_loss": 0.8289541602134705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5141885280609131, "epoch": 3.4, "learning_rate": 3.302197802197802e-05, "loss": 0.6306, "step": 4017, "task_loss": 0.2160712033510208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7404823303222656, "epoch": 3.4, "learning_rate": 3.301775147928994e-05, "loss": 0.7163, "step": 4018, "task_loss": 0.659570574760437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49293071031570435, "epoch": 3.4, "learning_rate": 3.301352493660186e-05, "loss": 0.7344, "step": 4019, "task_loss": 0.9586378335952759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8711064457893372, "epoch": 3.4, "learning_rate": 3.300929839391378e-05, "loss": 0.6297, "step": 4020, "task_loss": 0.40594664216041565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6690309047698975, "epoch": 3.4, "learning_rate": 3.30050718512257e-05, "loss": 0.6973, "step": 4021, "task_loss": 0.5502723455429077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5152108669281006, "epoch": 3.4, "learning_rate": 3.300084530853762e-05, "loss": 0.8188, "step": 4022, "task_loss": 0.34748587012290955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5054956674575806, "epoch": 3.4, "learning_rate": 3.299661876584953e-05, "loss": 0.6089, "step": 4023, "task_loss": 0.2677193880081177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.916710615158081, "epoch": 3.4, "learning_rate": 3.299239222316145e-05, "loss": 0.7993, "step": 4024, "task_loss": 0.7530816197395325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.424642950296402, "epoch": 3.4, "learning_rate": 3.298816568047338e-05, "loss": 0.6699, "step": 4025, "task_loss": 0.5044595003128052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5814586877822876, "epoch": 3.4, "learning_rate": 3.298393913778529e-05, "loss": 0.6204, "step": 4026, "task_loss": 0.7369222640991211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5689500570297241, "epoch": 3.4, "learning_rate": 3.297971259509721e-05, "loss": 0.8235, "step": 4027, "task_loss": 1.223319172859192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8197453022003174, "epoch": 3.4, "learning_rate": 3.297548605240913e-05, "loss": 0.7091, "step": 4028, "task_loss": 1.4007569551467896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6885471343994141, "epoch": 3.41, "learning_rate": 3.2971259509721045e-05, "loss": 0.5376, "step": 4029, "task_loss": 0.9437877535820007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49165111780166626, "epoch": 3.41, "learning_rate": 3.296703296703297e-05, "loss": 0.6809, "step": 4030, "task_loss": 0.46014800667762756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6432880163192749, "epoch": 3.41, "learning_rate": 3.296280642434489e-05, "loss": 0.7491, "step": 4031, "task_loss": 0.4910712242126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1444894075393677, "epoch": 3.41, "learning_rate": 3.2958579881656804e-05, "loss": 0.7569, "step": 4032, "task_loss": 0.9607234001159668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48879674077033997, "epoch": 3.41, "learning_rate": 3.2954353338968724e-05, "loss": 0.5373, "step": 4033, "task_loss": 0.32777366042137146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.555480420589447, "epoch": 3.41, "learning_rate": 3.295012679628064e-05, "loss": 0.5601, "step": 4034, "task_loss": 1.6399238109588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7140830159187317, "epoch": 3.41, "learning_rate": 3.294590025359256e-05, "loss": 0.7321, "step": 4035, "task_loss": 0.9857349395751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8306131362915039, "epoch": 3.41, "learning_rate": 3.294167371090448e-05, "loss": 0.5821, "step": 4036, "task_loss": 1.1293448209762573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6218990087509155, "epoch": 3.41, "learning_rate": 3.29374471682164e-05, "loss": 0.6024, "step": 4037, "task_loss": 0.30195924639701843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9194777011871338, "epoch": 3.41, "learning_rate": 3.293322062552832e-05, "loss": 0.8282, "step": 4038, "task_loss": 1.2538440227508545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36070582270622253, "epoch": 3.41, "learning_rate": 3.2928994082840235e-05, "loss": 0.6345, "step": 4039, "task_loss": 0.9978319406509399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2422235906124115, "epoch": 3.41, "learning_rate": 3.2924767540152155e-05, "loss": 0.4458, "step": 4040, "task_loss": 0.45990389585494995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.826747477054596, "epoch": 3.42, "learning_rate": 3.2920540997464075e-05, "loss": 0.6636, "step": 4041, "task_loss": 1.0456844568252563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39871031045913696, "epoch": 3.42, "learning_rate": 3.2916314454775994e-05, "loss": 0.642, "step": 4042, "task_loss": 0.4895150065422058 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33663707971572876, "epoch": 3.42, "learning_rate": 3.2912087912087914e-05, "loss": 0.7944, "step": 4043, "task_loss": 0.8684514760971069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.614155650138855, "epoch": 3.42, "learning_rate": 3.2907861369399834e-05, "loss": 0.539, "step": 4044, "task_loss": 0.10875343531370163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6658488512039185, "epoch": 3.42, "learning_rate": 3.290363482671175e-05, "loss": 0.6066, "step": 4045, "task_loss": 0.1566886603832245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6465568542480469, "epoch": 3.42, "learning_rate": 3.2899408284023667e-05, "loss": 0.5563, "step": 4046, "task_loss": 1.0074589252471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5670353770256042, "epoch": 3.42, "learning_rate": 3.289518174133559e-05, "loss": 0.6195, "step": 4047, "task_loss": 0.7071921825408936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4479367733001709, "epoch": 3.42, "learning_rate": 3.289095519864751e-05, "loss": 0.5335, "step": 4048, "task_loss": 0.3439406752586365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6772239208221436, "epoch": 3.42, "learning_rate": 3.2886728655959426e-05, "loss": 0.6037, "step": 4049, "task_loss": 0.45465779304504395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6022642254829407, "epoch": 3.42, "learning_rate": 3.2882502113271346e-05, "loss": 0.6563, "step": 4050, "task_loss": 1.0175983905792236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7284107208251953, "epoch": 3.42, "learning_rate": 3.2878275570583265e-05, "loss": 0.728, "step": 4051, "task_loss": 1.5928486585617065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5204176902770996, "epoch": 3.42, "learning_rate": 3.2874049027895185e-05, "loss": 0.6054, "step": 4052, "task_loss": 0.53810054063797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.628588855266571, "epoch": 3.43, "learning_rate": 3.2869822485207105e-05, "loss": 0.6286, "step": 4053, "task_loss": 0.6430641412734985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2044486999511719, "epoch": 3.43, "learning_rate": 3.2865595942519024e-05, "loss": 0.6788, "step": 4054, "task_loss": 0.9367953538894653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4022917151451111, "epoch": 3.43, "learning_rate": 3.286136939983094e-05, "loss": 0.6369, "step": 4055, "task_loss": 0.5606754422187805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5345295667648315, "epoch": 3.43, "learning_rate": 3.285714285714286e-05, "loss": 0.7832, "step": 4056, "task_loss": 0.8172202706336975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4476451873779297, "epoch": 3.43, "learning_rate": 3.285291631445478e-05, "loss": 0.4829, "step": 4057, "task_loss": 0.5786000490188599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.250101089477539, "epoch": 3.43, "learning_rate": 3.28486897717667e-05, "loss": 0.6598, "step": 4058, "task_loss": 0.8442792296409607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3972112238407135, "epoch": 3.43, "learning_rate": 3.2844463229078616e-05, "loss": 0.5063, "step": 4059, "task_loss": 0.8798096179962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41983237862586975, "epoch": 3.43, "learning_rate": 3.2840236686390536e-05, "loss": 0.5357, "step": 4060, "task_loss": 0.4424249231815338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4093361794948578, "epoch": 3.43, "learning_rate": 3.283601014370245e-05, "loss": 0.5427, "step": 4061, "task_loss": 0.25447678565979004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3815116286277771, "epoch": 3.43, "learning_rate": 3.283178360101437e-05, "loss": 0.7215, "step": 4062, "task_loss": 0.7735743522644043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0876439809799194, "epoch": 3.43, "learning_rate": 3.282755705832629e-05, "loss": 0.8521, "step": 4063, "task_loss": 0.8553889989852905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47514504194259644, "epoch": 3.44, "learning_rate": 3.2823330515638215e-05, "loss": 0.748, "step": 4064, "task_loss": 0.2769138514995575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7730213403701782, "epoch": 3.44, "learning_rate": 3.281910397295013e-05, "loss": 0.5825, "step": 4065, "task_loss": 0.704494833946228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6209992170333862, "epoch": 3.44, "learning_rate": 3.281487743026205e-05, "loss": 0.5109, "step": 4066, "task_loss": 0.7182151675224304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6192642450332642, "epoch": 3.44, "learning_rate": 3.281065088757397e-05, "loss": 0.5397, "step": 4067, "task_loss": 1.446255087852478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2995608150959015, "epoch": 3.44, "learning_rate": 3.280642434488588e-05, "loss": 0.6474, "step": 4068, "task_loss": 0.5294321179389954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6721882820129395, "epoch": 3.44, "learning_rate": 3.280219780219781e-05, "loss": 0.5631, "step": 4069, "task_loss": 0.2013845592737198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6690796613693237, "epoch": 3.44, "learning_rate": 3.279797125950973e-05, "loss": 0.6919, "step": 4070, "task_loss": 0.8039246201515198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7115398049354553, "epoch": 3.44, "learning_rate": 3.279374471682164e-05, "loss": 0.6749, "step": 4071, "task_loss": 1.9238874912261963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41662031412124634, "epoch": 3.44, "learning_rate": 3.278951817413356e-05, "loss": 0.5418, "step": 4072, "task_loss": 1.279392123222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6019200682640076, "epoch": 3.44, "learning_rate": 3.278529163144548e-05, "loss": 0.531, "step": 4073, "task_loss": 0.6550692915916443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7717655897140503, "epoch": 3.44, "learning_rate": 3.27810650887574e-05, "loss": 0.5532, "step": 4074, "task_loss": 0.8793482184410095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5803868174552917, "epoch": 3.44, "learning_rate": 3.277683854606932e-05, "loss": 0.6325, "step": 4075, "task_loss": 1.3391324281692505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29155969619750977, "epoch": 3.45, "learning_rate": 3.277261200338124e-05, "loss": 0.3528, "step": 4076, "task_loss": 0.1384962648153305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8540109992027283, "epoch": 3.45, "learning_rate": 3.276838546069316e-05, "loss": 0.5834, "step": 4077, "task_loss": 0.9989869594573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5168101191520691, "epoch": 3.45, "learning_rate": 3.276415891800507e-05, "loss": 0.5211, "step": 4078, "task_loss": 0.8434225916862488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43424078822135925, "epoch": 3.45, "learning_rate": 3.275993237531699e-05, "loss": 0.578, "step": 4079, "task_loss": 0.5573917627334595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5935831069946289, "epoch": 3.45, "learning_rate": 3.275570583262891e-05, "loss": 0.7061, "step": 4080, "task_loss": 0.1950499415397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5144053101539612, "epoch": 3.45, "learning_rate": 3.275147928994083e-05, "loss": 0.6545, "step": 4081, "task_loss": 1.4214547872543335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9065748453140259, "epoch": 3.45, "learning_rate": 3.274725274725275e-05, "loss": 0.8582, "step": 4082, "task_loss": 1.4179044961929321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4021805226802826, "epoch": 3.45, "learning_rate": 3.274302620456467e-05, "loss": 0.6486, "step": 4083, "task_loss": 0.3329828381538391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6248587369918823, "epoch": 3.45, "learning_rate": 3.273879966187658e-05, "loss": 0.8051, "step": 4084, "task_loss": 0.6296215653419495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5132864117622375, "epoch": 3.45, "learning_rate": 3.27345731191885e-05, "loss": 0.4438, "step": 4085, "task_loss": 0.9903122186660767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34761959314346313, "epoch": 3.45, "learning_rate": 3.273034657650043e-05, "loss": 0.557, "step": 4086, "task_loss": 0.6588301658630371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7301135063171387, "epoch": 3.45, "learning_rate": 3.272612003381234e-05, "loss": 0.5641, "step": 4087, "task_loss": 0.5226346850395203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6535298824310303, "epoch": 3.46, "learning_rate": 3.272189349112426e-05, "loss": 0.9017, "step": 4088, "task_loss": 0.8846993446350098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6151911020278931, "epoch": 3.46, "learning_rate": 3.271766694843618e-05, "loss": 0.4643, "step": 4089, "task_loss": 0.8631864786148071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6418994069099426, "epoch": 3.46, "learning_rate": 3.2713440405748094e-05, "loss": 0.6072, "step": 4090, "task_loss": 1.6342202425003052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6189375519752502, "epoch": 3.46, "learning_rate": 3.270921386306002e-05, "loss": 0.7721, "step": 4091, "task_loss": 0.5247688889503479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5908918380737305, "epoch": 3.46, "learning_rate": 3.270498732037194e-05, "loss": 0.802, "step": 4092, "task_loss": 0.6192498803138733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35019874572753906, "epoch": 3.46, "learning_rate": 3.270076077768386e-05, "loss": 0.5409, "step": 4093, "task_loss": 0.3839492201805115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27196240425109863, "epoch": 3.46, "learning_rate": 3.269653423499577e-05, "loss": 0.4499, "step": 4094, "task_loss": 0.21329045295715332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5424904823303223, "epoch": 3.46, "learning_rate": 3.269230769230769e-05, "loss": 0.636, "step": 4095, "task_loss": 1.7122716903686523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3017902374267578, "epoch": 3.46, "learning_rate": 3.268808114961961e-05, "loss": 0.6089, "step": 4096, "task_loss": 0.2441330999135971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43357884883880615, "epoch": 3.46, "learning_rate": 3.268385460693153e-05, "loss": 0.5503, "step": 4097, "task_loss": 0.25708678364753723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4622652232646942, "epoch": 3.46, "learning_rate": 3.267962806424345e-05, "loss": 0.4943, "step": 4098, "task_loss": 0.5686721801757812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1767756938934326, "epoch": 3.46, "learning_rate": 3.267540152155537e-05, "loss": 0.6719, "step": 4099, "task_loss": 0.7881876230239868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4500119686126709, "epoch": 3.47, "learning_rate": 3.2671174978867285e-05, "loss": 0.5455, "step": 4100, "task_loss": 0.3744850754737854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7269495725631714, "epoch": 3.47, "learning_rate": 3.2666948436179205e-05, "loss": 0.7929, "step": 4101, "task_loss": 0.3960084021091461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6521552801132202, "epoch": 3.47, "learning_rate": 3.2662721893491124e-05, "loss": 0.5437, "step": 4102, "task_loss": 0.45776593685150146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5061033368110657, "epoch": 3.47, "learning_rate": 3.2658495350803044e-05, "loss": 0.6641, "step": 4103, "task_loss": 1.0900051593780518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7579696178436279, "epoch": 3.47, "learning_rate": 3.2654268808114964e-05, "loss": 0.6505, "step": 4104, "task_loss": 1.058566927909851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6288526654243469, "epoch": 3.47, "learning_rate": 3.2650042265426884e-05, "loss": 0.6373, "step": 4105, "task_loss": 0.5247631072998047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5674715042114258, "epoch": 3.47, "learning_rate": 3.2645815722738803e-05, "loss": 0.6245, "step": 4106, "task_loss": 0.6395894289016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37211471796035767, "epoch": 3.47, "learning_rate": 3.2641589180050716e-05, "loss": 0.5863, "step": 4107, "task_loss": 0.8074463605880737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8475644588470459, "epoch": 3.47, "learning_rate": 3.263736263736264e-05, "loss": 0.5912, "step": 4108, "task_loss": 0.6117957234382629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.014394760131836, "epoch": 3.47, "learning_rate": 3.263313609467456e-05, "loss": 0.6077, "step": 4109, "task_loss": 0.7403123378753662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5096998810768127, "epoch": 3.47, "learning_rate": 3.2628909551986476e-05, "loss": 0.4936, "step": 4110, "task_loss": 0.5338015556335449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7407160997390747, "epoch": 3.47, "learning_rate": 3.2624683009298395e-05, "loss": 0.7413, "step": 4111, "task_loss": 0.42836934328079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6188712120056152, "epoch": 3.48, "learning_rate": 3.2620456466610315e-05, "loss": 0.5681, "step": 4112, "task_loss": 0.625019907951355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5124812126159668, "epoch": 3.48, "learning_rate": 3.261622992392223e-05, "loss": 0.4905, "step": 4113, "task_loss": 0.9299064874649048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3338650166988373, "epoch": 3.48, "learning_rate": 3.2612003381234155e-05, "loss": 0.5029, "step": 4114, "task_loss": 0.22932808101177216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5770798921585083, "epoch": 3.48, "learning_rate": 3.2607776838546074e-05, "loss": 0.5792, "step": 4115, "task_loss": 0.17936843633651733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7312511801719666, "epoch": 3.48, "learning_rate": 3.260355029585799e-05, "loss": 0.6715, "step": 4116, "task_loss": 1.1348507404327393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4917176067829132, "epoch": 3.48, "learning_rate": 3.259932375316991e-05, "loss": 0.5181, "step": 4117, "task_loss": 0.6452078223228455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6049743890762329, "epoch": 3.48, "learning_rate": 3.259509721048183e-05, "loss": 0.7414, "step": 4118, "task_loss": 0.9584172368049622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4489871561527252, "epoch": 3.48, "learning_rate": 3.2590870667793746e-05, "loss": 0.4471, "step": 4119, "task_loss": 1.2409334182739258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24057623744010925, "epoch": 3.48, "learning_rate": 3.2586644125105666e-05, "loss": 0.5333, "step": 4120, "task_loss": 0.9440165758132935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4532517194747925, "epoch": 3.48, "learning_rate": 3.2582417582417586e-05, "loss": 0.854, "step": 4121, "task_loss": 0.6270748376846313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8020159602165222, "epoch": 3.48, "learning_rate": 3.2578191039729506e-05, "loss": 0.6152, "step": 4122, "task_loss": 0.526718020439148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6842496395111084, "epoch": 3.48, "learning_rate": 3.257396449704142e-05, "loss": 0.6181, "step": 4123, "task_loss": 0.6086697578430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5249884128570557, "epoch": 3.49, "learning_rate": 3.256973795435334e-05, "loss": 0.6199, "step": 4124, "task_loss": 0.33074983954429626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.051375150680542, "epoch": 3.49, "learning_rate": 3.2565511411665265e-05, "loss": 0.6707, "step": 4125, "task_loss": 0.9608281850814819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5947024822235107, "epoch": 3.49, "learning_rate": 3.256128486897718e-05, "loss": 0.5817, "step": 4126, "task_loss": 1.190467119216919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5592034459114075, "epoch": 3.49, "learning_rate": 3.25570583262891e-05, "loss": 0.6001, "step": 4127, "task_loss": 0.2505715489387512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6960796117782593, "epoch": 3.49, "learning_rate": 3.255283178360102e-05, "loss": 0.5672, "step": 4128, "task_loss": 0.39553961157798767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3609251976013184, "epoch": 3.49, "learning_rate": 3.254860524091293e-05, "loss": 0.8422, "step": 4129, "task_loss": 0.9063063263893127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.734248161315918, "epoch": 3.49, "learning_rate": 3.254437869822485e-05, "loss": 0.6301, "step": 4130, "task_loss": 0.5371847152709961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7986608743667603, "epoch": 3.49, "learning_rate": 3.2540152155536777e-05, "loss": 0.5571, "step": 4131, "task_loss": 1.0364172458648682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7163912057876587, "epoch": 3.49, "learning_rate": 3.253592561284869e-05, "loss": 0.4599, "step": 4132, "task_loss": 0.8118235468864441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6304093599319458, "epoch": 3.49, "learning_rate": 3.253169907016061e-05, "loss": 0.6808, "step": 4133, "task_loss": 0.6577768325805664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7390733957290649, "epoch": 3.49, "learning_rate": 3.252747252747253e-05, "loss": 0.7871, "step": 4134, "task_loss": 0.5037205815315247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43962255120277405, "epoch": 3.5, "learning_rate": 3.252324598478445e-05, "loss": 0.6081, "step": 4135, "task_loss": 0.6094099283218384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47717249393463135, "epoch": 3.5, "learning_rate": 3.251901944209637e-05, "loss": 0.5113, "step": 4136, "task_loss": 0.7854862809181213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6500998139381409, "epoch": 3.5, "learning_rate": 3.251479289940829e-05, "loss": 0.6567, "step": 4137, "task_loss": 0.5980322360992432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4689990282058716, "epoch": 3.5, "learning_rate": 3.251056635672021e-05, "loss": 0.6206, "step": 4138, "task_loss": 0.18691451847553253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6646602153778076, "epoch": 3.5, "learning_rate": 3.250633981403212e-05, "loss": 0.6217, "step": 4139, "task_loss": 0.484631210565567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5594324469566345, "epoch": 3.5, "learning_rate": 3.250211327134404e-05, "loss": 0.9303, "step": 4140, "task_loss": 0.44872263073921204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0829566717147827, "epoch": 3.5, "learning_rate": 3.249788672865596e-05, "loss": 0.7187, "step": 4141, "task_loss": 0.9525862336158752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7319719791412354, "epoch": 3.5, "learning_rate": 3.249366018596788e-05, "loss": 0.6668, "step": 4142, "task_loss": 1.2924727201461792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48716580867767334, "epoch": 3.5, "learning_rate": 3.24894336432798e-05, "loss": 0.5509, "step": 4143, "task_loss": 0.06743727624416351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5892049670219421, "epoch": 3.5, "learning_rate": 3.248520710059172e-05, "loss": 0.6057, "step": 4144, "task_loss": 0.7050666213035583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5103580355644226, "epoch": 3.5, "learning_rate": 3.248098055790363e-05, "loss": 0.478, "step": 4145, "task_loss": 0.5845856070518494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5487319827079773, "epoch": 3.5, "learning_rate": 3.247675401521555e-05, "loss": 0.6386, "step": 4146, "task_loss": 0.9110307097434998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8138513565063477, "epoch": 3.51, "learning_rate": 3.247252747252747e-05, "loss": 0.6887, "step": 4147, "task_loss": 0.7805132865905762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5571469068527222, "epoch": 3.51, "learning_rate": 3.246830092983939e-05, "loss": 0.6094, "step": 4148, "task_loss": 0.672821044921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7280246019363403, "epoch": 3.51, "learning_rate": 3.246407438715131e-05, "loss": 0.523, "step": 4149, "task_loss": 0.2927163243293762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7121900320053101, "epoch": 3.51, "learning_rate": 3.245984784446323e-05, "loss": 0.7706, "step": 4150, "task_loss": 1.4445549249649048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5459108948707581, "epoch": 3.51, "learning_rate": 3.245562130177515e-05, "loss": 0.7939, "step": 4151, "task_loss": 0.28922948241233826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7208449840545654, "epoch": 3.51, "learning_rate": 3.2451394759087064e-05, "loss": 0.7225, "step": 4152, "task_loss": 0.22648075222969055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5601734519004822, "epoch": 3.51, "learning_rate": 3.244716821639899e-05, "loss": 0.6278, "step": 4153, "task_loss": 1.0026342868804932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6091493368148804, "epoch": 3.51, "learning_rate": 3.244294167371091e-05, "loss": 0.6187, "step": 4154, "task_loss": 0.5586197972297668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4073311686515808, "epoch": 3.51, "learning_rate": 3.243871513102282e-05, "loss": 0.4921, "step": 4155, "task_loss": 0.3951167166233063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5530438423156738, "epoch": 3.51, "learning_rate": 3.243448858833474e-05, "loss": 0.5994, "step": 4156, "task_loss": 0.595047116279602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7245615124702454, "epoch": 3.51, "learning_rate": 3.243026204564666e-05, "loss": 0.5904, "step": 4157, "task_loss": 0.5793409943580627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6666686534881592, "epoch": 3.51, "learning_rate": 3.242603550295858e-05, "loss": 0.707, "step": 4158, "task_loss": 1.3579907417297363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6762040853500366, "epoch": 3.52, "learning_rate": 3.24218089602705e-05, "loss": 0.5523, "step": 4159, "task_loss": 0.7119670510292053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9305746555328369, "epoch": 3.52, "learning_rate": 3.241758241758242e-05, "loss": 0.6398, "step": 4160, "task_loss": 1.2407652139663696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6372056603431702, "epoch": 3.52, "learning_rate": 3.2413355874894335e-05, "loss": 0.6543, "step": 4161, "task_loss": 1.2462353706359863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9108688235282898, "epoch": 3.52, "learning_rate": 3.2409129332206255e-05, "loss": 0.6203, "step": 4162, "task_loss": 1.2412667274475098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4291374385356903, "epoch": 3.52, "learning_rate": 3.2404902789518174e-05, "loss": 0.5399, "step": 4163, "task_loss": 0.767941415309906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.5653200149536133, "epoch": 3.52, "learning_rate": 3.2400676246830094e-05, "loss": 0.9669, "step": 4164, "task_loss": 1.1558754444122314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5701494216918945, "epoch": 3.52, "learning_rate": 3.2396449704142014e-05, "loss": 0.6029, "step": 4165, "task_loss": 1.1915533542633057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3344026207923889, "epoch": 3.52, "learning_rate": 3.2392223161453934e-05, "loss": 0.5296, "step": 4166, "task_loss": 0.39103928208351135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6021039485931396, "epoch": 3.52, "learning_rate": 3.238799661876585e-05, "loss": 0.7829, "step": 4167, "task_loss": 1.1129626035690308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5124154090881348, "epoch": 3.52, "learning_rate": 3.2383770076077766e-05, "loss": 0.6512, "step": 4168, "task_loss": 0.40392643213272095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.407939612865448, "epoch": 3.52, "learning_rate": 3.2379543533389686e-05, "loss": 0.4337, "step": 4169, "task_loss": 0.09862037003040314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6697307825088501, "epoch": 3.52, "learning_rate": 3.237531699070161e-05, "loss": 0.5467, "step": 4170, "task_loss": 0.7681911587715149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5678291320800781, "epoch": 3.53, "learning_rate": 3.2371090448013525e-05, "loss": 0.5375, "step": 4171, "task_loss": 0.2743608057498932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8003935813903809, "epoch": 3.53, "learning_rate": 3.2366863905325445e-05, "loss": 0.562, "step": 4172, "task_loss": 1.7369859218597412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8253204822540283, "epoch": 3.53, "learning_rate": 3.2362637362637365e-05, "loss": 0.814, "step": 4173, "task_loss": 2.1610679626464844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7380273342132568, "epoch": 3.53, "learning_rate": 3.235841081994928e-05, "loss": 0.6779, "step": 4174, "task_loss": 1.8532426357269287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42812979221343994, "epoch": 3.53, "learning_rate": 3.2354184277261204e-05, "loss": 0.566, "step": 4175, "task_loss": 0.7260921001434326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7297577857971191, "epoch": 3.53, "learning_rate": 3.2349957734573124e-05, "loss": 0.6703, "step": 4176, "task_loss": 0.7097510099411011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5491771697998047, "epoch": 3.53, "learning_rate": 3.234573119188504e-05, "loss": 0.6603, "step": 4177, "task_loss": 0.992276668548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35628795623779297, "epoch": 3.53, "learning_rate": 3.234150464919696e-05, "loss": 0.5347, "step": 4178, "task_loss": 0.4289785623550415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7322530746459961, "epoch": 3.53, "learning_rate": 3.2337278106508877e-05, "loss": 0.5781, "step": 4179, "task_loss": 0.947498083114624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.655799150466919, "epoch": 3.53, "learning_rate": 3.2333051563820796e-05, "loss": 0.5234, "step": 4180, "task_loss": 0.5578104257583618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7058315277099609, "epoch": 3.53, "learning_rate": 3.2328825021132716e-05, "loss": 0.7592, "step": 4181, "task_loss": 1.0126543045043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6310428380966187, "epoch": 3.53, "learning_rate": 3.2324598478444636e-05, "loss": 0.6544, "step": 4182, "task_loss": 1.3184713125228882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4339359700679779, "epoch": 3.54, "learning_rate": 3.2320371935756556e-05, "loss": 0.6474, "step": 4183, "task_loss": 0.09060013294219971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6512573957443237, "epoch": 3.54, "learning_rate": 3.231614539306847e-05, "loss": 0.5401, "step": 4184, "task_loss": 0.8618577718734741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5656225085258484, "epoch": 3.54, "learning_rate": 3.231191885038039e-05, "loss": 0.568, "step": 4185, "task_loss": 1.2756853103637695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6193535923957825, "epoch": 3.54, "learning_rate": 3.230769230769231e-05, "loss": 0.566, "step": 4186, "task_loss": 0.8474246263504028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7238597869873047, "epoch": 3.54, "learning_rate": 3.230346576500423e-05, "loss": 0.5672, "step": 4187, "task_loss": 0.5749729871749878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4649112820625305, "epoch": 3.54, "learning_rate": 3.229923922231615e-05, "loss": 0.6606, "step": 4188, "task_loss": 0.8154797554016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5395984649658203, "epoch": 3.54, "learning_rate": 3.229501267962807e-05, "loss": 0.6846, "step": 4189, "task_loss": 0.9181526303291321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6296202540397644, "epoch": 3.54, "learning_rate": 3.229078613693998e-05, "loss": 0.6547, "step": 4190, "task_loss": 0.6031090021133423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4456971287727356, "epoch": 3.54, "learning_rate": 3.22865595942519e-05, "loss": 0.4878, "step": 4191, "task_loss": 0.14205634593963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7604584693908691, "epoch": 3.54, "learning_rate": 3.2282333051563826e-05, "loss": 0.7052, "step": 4192, "task_loss": 1.1351755857467651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6406766176223755, "epoch": 3.54, "learning_rate": 3.2278106508875746e-05, "loss": 0.6171, "step": 4193, "task_loss": 1.1458464860916138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6606093645095825, "epoch": 3.54, "learning_rate": 3.227387996618766e-05, "loss": 0.5583, "step": 4194, "task_loss": 0.3046940863132477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48414623737335205, "epoch": 3.55, "learning_rate": 3.226965342349958e-05, "loss": 0.6198, "step": 4195, "task_loss": 1.0572696924209595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6292560696601868, "epoch": 3.55, "learning_rate": 3.22654268808115e-05, "loss": 0.5071, "step": 4196, "task_loss": 0.6799464821815491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5613049268722534, "epoch": 3.55, "learning_rate": 3.226120033812342e-05, "loss": 0.5547, "step": 4197, "task_loss": 0.6906691193580627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3893170654773712, "epoch": 3.55, "learning_rate": 3.225697379543534e-05, "loss": 0.5784, "step": 4198, "task_loss": 1.097582459449768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.531936764717102, "epoch": 3.55, "learning_rate": 3.225274725274726e-05, "loss": 0.5647, "step": 4199, "task_loss": 0.904606819152832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.70243239402771, "epoch": 3.55, "learning_rate": 3.224852071005917e-05, "loss": 0.6304, "step": 4200, "task_loss": 0.8562054634094238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5404973030090332, "epoch": 3.55, "learning_rate": 3.224429416737109e-05, "loss": 0.4272, "step": 4201, "task_loss": 0.38080593943595886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9328231811523438, "epoch": 3.55, "learning_rate": 3.224006762468301e-05, "loss": 0.6323, "step": 4202, "task_loss": 0.48349064588546753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.568303108215332, "epoch": 3.55, "learning_rate": 3.223584108199493e-05, "loss": 0.5869, "step": 4203, "task_loss": 0.361704558134079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7974933385848999, "epoch": 3.55, "learning_rate": 3.223161453930685e-05, "loss": 0.7818, "step": 4204, "task_loss": 0.9017975330352783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35864779353141785, "epoch": 3.55, "learning_rate": 3.222738799661877e-05, "loss": 0.5251, "step": 4205, "task_loss": 0.5652700662612915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8452371954917908, "epoch": 3.56, "learning_rate": 3.222316145393068e-05, "loss": 0.7821, "step": 4206, "task_loss": 0.7905057072639465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49725431203842163, "epoch": 3.56, "learning_rate": 3.22189349112426e-05, "loss": 0.6887, "step": 4207, "task_loss": 0.24928267300128937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6178297996520996, "epoch": 3.56, "learning_rate": 3.221470836855452e-05, "loss": 0.5617, "step": 4208, "task_loss": 0.4262554347515106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5190914869308472, "epoch": 3.56, "learning_rate": 3.221048182586645e-05, "loss": 0.604, "step": 4209, "task_loss": 0.7261759042739868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.473241925239563, "epoch": 3.56, "learning_rate": 3.220625528317836e-05, "loss": 0.6712, "step": 4210, "task_loss": 0.2535596787929535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.665992796421051, "epoch": 3.56, "learning_rate": 3.220202874049028e-05, "loss": 0.7544, "step": 4211, "task_loss": 0.8691056966781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7985689043998718, "epoch": 3.56, "learning_rate": 3.21978021978022e-05, "loss": 0.6876, "step": 4212, "task_loss": 0.8100976347923279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5654987096786499, "epoch": 3.56, "learning_rate": 3.2193575655114114e-05, "loss": 0.609, "step": 4213, "task_loss": 0.7667639851570129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.638790488243103, "epoch": 3.56, "learning_rate": 3.218934911242604e-05, "loss": 0.6383, "step": 4214, "task_loss": 1.1682111024856567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7357028722763062, "epoch": 3.56, "learning_rate": 3.218512256973796e-05, "loss": 0.7164, "step": 4215, "task_loss": 0.844890832901001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4252817630767822, "epoch": 3.56, "learning_rate": 3.218089602704987e-05, "loss": 0.4598, "step": 4216, "task_loss": 0.43803659081459045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47045400738716125, "epoch": 3.56, "learning_rate": 3.217666948436179e-05, "loss": 0.6149, "step": 4217, "task_loss": 0.7341237664222717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46930402517318726, "epoch": 3.57, "learning_rate": 3.217244294167371e-05, "loss": 0.7054, "step": 4218, "task_loss": 0.5197510719299316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6082526445388794, "epoch": 3.57, "learning_rate": 3.216821639898563e-05, "loss": 0.5322, "step": 4219, "task_loss": 0.9213230609893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5877562761306763, "epoch": 3.57, "learning_rate": 3.216398985629755e-05, "loss": 0.8263, "step": 4220, "task_loss": 1.1192339658737183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2452743649482727, "epoch": 3.57, "learning_rate": 3.215976331360947e-05, "loss": 0.5747, "step": 4221, "task_loss": 0.06740829348564148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4869162440299988, "epoch": 3.57, "learning_rate": 3.215553677092139e-05, "loss": 0.4561, "step": 4222, "task_loss": 0.8501110076904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7156858444213867, "epoch": 3.57, "learning_rate": 3.2151310228233304e-05, "loss": 0.5241, "step": 4223, "task_loss": 0.9623615145683289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9896580576896667, "epoch": 3.57, "learning_rate": 3.2147083685545224e-05, "loss": 0.6587, "step": 4224, "task_loss": 1.3983900547027588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6754773855209351, "epoch": 3.57, "learning_rate": 3.2142857142857144e-05, "loss": 0.5931, "step": 4225, "task_loss": 1.2624531984329224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.463307648897171, "epoch": 3.57, "learning_rate": 3.2138630600169064e-05, "loss": 0.5911, "step": 4226, "task_loss": 1.2462469339370728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4898664951324463, "epoch": 3.57, "learning_rate": 3.2134404057480983e-05, "loss": 0.4909, "step": 4227, "task_loss": 0.5271729230880737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6329209804534912, "epoch": 3.57, "learning_rate": 3.21301775147929e-05, "loss": 0.5239, "step": 4228, "task_loss": 0.42427557706832886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8216217160224915, "epoch": 3.57, "learning_rate": 3.2125950972104816e-05, "loss": 0.8204, "step": 4229, "task_loss": 1.9188570976257324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6262178421020508, "epoch": 3.58, "learning_rate": 3.2121724429416736e-05, "loss": 0.5102, "step": 4230, "task_loss": 1.105137825012207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6141501665115356, "epoch": 3.58, "learning_rate": 3.211749788672866e-05, "loss": 0.5768, "step": 4231, "task_loss": 0.8140987753868103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3838309049606323, "epoch": 3.58, "learning_rate": 3.2113271344040575e-05, "loss": 0.5679, "step": 4232, "task_loss": 0.7179095149040222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5304633378982544, "epoch": 3.58, "learning_rate": 3.2109044801352495e-05, "loss": 0.6251, "step": 4233, "task_loss": 0.7526938319206238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3643147945404053, "epoch": 3.58, "learning_rate": 3.2104818258664415e-05, "loss": 0.504, "step": 4234, "task_loss": 0.3876301050186157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3744189739227295, "epoch": 3.58, "learning_rate": 3.210059171597633e-05, "loss": 0.658, "step": 4235, "task_loss": 0.9470755457878113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34365764260292053, "epoch": 3.58, "learning_rate": 3.2096365173288254e-05, "loss": 0.4737, "step": 4236, "task_loss": 0.3356644809246063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8528670072555542, "epoch": 3.58, "learning_rate": 3.2092138630600174e-05, "loss": 0.663, "step": 4237, "task_loss": 0.8852180242538452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35245010256767273, "epoch": 3.58, "learning_rate": 3.2087912087912094e-05, "loss": 0.5989, "step": 4238, "task_loss": 1.1525589227676392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6179569959640503, "epoch": 3.58, "learning_rate": 3.208368554522401e-05, "loss": 0.5201, "step": 4239, "task_loss": 0.2603001296520233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9398728609085083, "epoch": 3.58, "learning_rate": 3.2079459002535926e-05, "loss": 0.6746, "step": 4240, "task_loss": 1.5526063442230225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5932520627975464, "epoch": 3.58, "learning_rate": 3.2075232459847846e-05, "loss": 0.6777, "step": 4241, "task_loss": 0.7001931071281433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6230915784835815, "epoch": 3.59, "learning_rate": 3.2071005917159766e-05, "loss": 0.7681, "step": 4242, "task_loss": 1.1468700170516968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7679072618484497, "epoch": 3.59, "learning_rate": 3.2066779374471686e-05, "loss": 0.6807, "step": 4243, "task_loss": 0.41273894906044006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5656412243843079, "epoch": 3.59, "learning_rate": 3.2062552831783605e-05, "loss": 0.5988, "step": 4244, "task_loss": 0.8380447626113892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3785839080810547, "epoch": 3.59, "learning_rate": 3.205832628909552e-05, "loss": 0.6497, "step": 4245, "task_loss": 0.1459299921989441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4484691023826599, "epoch": 3.59, "learning_rate": 3.205409974640744e-05, "loss": 0.5873, "step": 4246, "task_loss": 0.13609357178211212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36376792192459106, "epoch": 3.59, "learning_rate": 3.204987320371936e-05, "loss": 0.5182, "step": 4247, "task_loss": 0.6277802586555481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6348499059677124, "epoch": 3.59, "learning_rate": 3.204564666103128e-05, "loss": 0.5093, "step": 4248, "task_loss": 0.3349606990814209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5473830699920654, "epoch": 3.59, "learning_rate": 3.20414201183432e-05, "loss": 0.5383, "step": 4249, "task_loss": 1.2277259826660156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46672943234443665, "epoch": 3.59, "learning_rate": 3.203719357565512e-05, "loss": 0.457, "step": 4250, "task_loss": 0.6045480966567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7462354898452759, "epoch": 3.59, "learning_rate": 3.203296703296704e-05, "loss": 0.7498, "step": 4251, "task_loss": 0.43652093410491943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3947990834712982, "epoch": 3.59, "learning_rate": 3.202874049027895e-05, "loss": 0.4996, "step": 4252, "task_loss": 0.5876834392547607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4972449243068695, "epoch": 3.59, "learning_rate": 3.202451394759087e-05, "loss": 0.7479, "step": 4253, "task_loss": 2.7319962978363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2912489175796509, "epoch": 3.6, "learning_rate": 3.2020287404902796e-05, "loss": 0.6889, "step": 4254, "task_loss": 0.9310650825500488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4191456437110901, "epoch": 3.6, "learning_rate": 3.201606086221471e-05, "loss": 0.5654, "step": 4255, "task_loss": 0.37675347924232483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.3826098442077637, "epoch": 3.6, "learning_rate": 3.201183431952663e-05, "loss": 0.7594, "step": 4256, "task_loss": 1.8409432172775269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8128854632377625, "epoch": 3.6, "learning_rate": 3.200760777683855e-05, "loss": 0.5836, "step": 4257, "task_loss": 0.6604255437850952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37107521295547485, "epoch": 3.6, "learning_rate": 3.200338123415046e-05, "loss": 0.4756, "step": 4258, "task_loss": 0.44623643159866333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6175744533538818, "epoch": 3.6, "learning_rate": 3.199915469146239e-05, "loss": 0.725, "step": 4259, "task_loss": 1.1219886541366577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8102858066558838, "epoch": 3.6, "learning_rate": 3.199492814877431e-05, "loss": 0.6319, "step": 4260, "task_loss": 0.6226069331169128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42583853006362915, "epoch": 3.6, "learning_rate": 3.199070160608622e-05, "loss": 0.5787, "step": 4261, "task_loss": 0.7398431897163391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4259260296821594, "epoch": 3.6, "learning_rate": 3.198647506339814e-05, "loss": 0.6635, "step": 4262, "task_loss": 0.5187358856201172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4843616485595703, "epoch": 3.6, "learning_rate": 3.198224852071006e-05, "loss": 0.6515, "step": 4263, "task_loss": 0.8118218779563904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30011484026908875, "epoch": 3.6, "learning_rate": 3.197802197802198e-05, "loss": 0.5075, "step": 4264, "task_loss": 0.024920698255300522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6588087677955627, "epoch": 3.6, "learning_rate": 3.19737954353339e-05, "loss": 0.543, "step": 4265, "task_loss": 0.7919685244560242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49749594926834106, "epoch": 3.61, "learning_rate": 3.196956889264582e-05, "loss": 0.4436, "step": 4266, "task_loss": 0.3057381510734558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6445580720901489, "epoch": 3.61, "learning_rate": 3.196534234995774e-05, "loss": 0.5012, "step": 4267, "task_loss": 0.8744177222251892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5139703750610352, "epoch": 3.61, "learning_rate": 3.196111580726965e-05, "loss": 0.5621, "step": 4268, "task_loss": 1.2391936779022217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.523512065410614, "epoch": 3.61, "learning_rate": 3.195688926458157e-05, "loss": 0.8205, "step": 4269, "task_loss": 0.42643898725509644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5993216633796692, "epoch": 3.61, "learning_rate": 3.195266272189349e-05, "loss": 0.5618, "step": 4270, "task_loss": 0.3088935315608978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8363982439041138, "epoch": 3.61, "learning_rate": 3.194843617920541e-05, "loss": 0.6916, "step": 4271, "task_loss": 2.10528302192688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5748842358589172, "epoch": 3.61, "learning_rate": 3.194420963651733e-05, "loss": 0.6422, "step": 4272, "task_loss": 1.529256820678711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5410435199737549, "epoch": 3.61, "learning_rate": 3.193998309382925e-05, "loss": 0.5835, "step": 4273, "task_loss": 0.9316344261169434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32317885756492615, "epoch": 3.61, "learning_rate": 3.1935756551141164e-05, "loss": 0.5987, "step": 4274, "task_loss": 0.921099066734314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5443952083587646, "epoch": 3.61, "learning_rate": 3.1931530008453083e-05, "loss": 0.5943, "step": 4275, "task_loss": 0.8989710211753845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6211710572242737, "epoch": 3.61, "learning_rate": 3.192730346576501e-05, "loss": 0.6788, "step": 4276, "task_loss": 0.5778294205665588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8003108501434326, "epoch": 3.61, "learning_rate": 3.192307692307692e-05, "loss": 0.6921, "step": 4277, "task_loss": 0.7750545740127563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43057748675346375, "epoch": 3.62, "learning_rate": 3.191885038038884e-05, "loss": 0.4917, "step": 4278, "task_loss": 0.2805824279785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5174372792243958, "epoch": 3.62, "learning_rate": 3.191462383770076e-05, "loss": 0.6098, "step": 4279, "task_loss": 0.4504948556423187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4709424078464508, "epoch": 3.62, "learning_rate": 3.1910397295012675e-05, "loss": 0.5441, "step": 4280, "task_loss": 0.4343849718570709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9384723901748657, "epoch": 3.62, "learning_rate": 3.19061707523246e-05, "loss": 0.6999, "step": 4281, "task_loss": 1.215951681137085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6971558332443237, "epoch": 3.62, "learning_rate": 3.190194420963652e-05, "loss": 0.738, "step": 4282, "task_loss": 1.10869300365448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29778748750686646, "epoch": 3.62, "learning_rate": 3.189771766694844e-05, "loss": 0.4081, "step": 4283, "task_loss": 0.2580684721469879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6616436839103699, "epoch": 3.62, "learning_rate": 3.1893491124260354e-05, "loss": 0.4991, "step": 4284, "task_loss": 0.9533717036247253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37196701765060425, "epoch": 3.62, "learning_rate": 3.1889264581572274e-05, "loss": 0.6114, "step": 4285, "task_loss": 0.3012705147266388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9313027858734131, "epoch": 3.62, "learning_rate": 3.1885038038884194e-05, "loss": 0.595, "step": 4286, "task_loss": 0.8316695690155029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3201221525669098, "epoch": 3.62, "learning_rate": 3.1880811496196113e-05, "loss": 0.4636, "step": 4287, "task_loss": 0.3647851049900055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4864259362220764, "epoch": 3.62, "learning_rate": 3.187658495350803e-05, "loss": 0.5722, "step": 4288, "task_loss": 1.0231246948242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7902079820632935, "epoch": 3.63, "learning_rate": 3.187235841081995e-05, "loss": 0.7221, "step": 4289, "task_loss": 0.8456388115882874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7198704481124878, "epoch": 3.63, "learning_rate": 3.1868131868131866e-05, "loss": 0.743, "step": 4290, "task_loss": 1.1135166883468628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5688356161117554, "epoch": 3.63, "learning_rate": 3.1863905325443786e-05, "loss": 0.8247, "step": 4291, "task_loss": 0.44530370831489563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6359323859214783, "epoch": 3.63, "learning_rate": 3.1859678782755705e-05, "loss": 0.6318, "step": 4292, "task_loss": 0.7541476488113403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6856552362442017, "epoch": 3.63, "learning_rate": 3.1855452240067625e-05, "loss": 0.7056, "step": 4293, "task_loss": 1.386191725730896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7288258075714111, "epoch": 3.63, "learning_rate": 3.1851225697379545e-05, "loss": 0.6303, "step": 4294, "task_loss": 1.078108787536621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4438767731189728, "epoch": 3.63, "learning_rate": 3.1846999154691465e-05, "loss": 0.4995, "step": 4295, "task_loss": 0.8345937728881836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5319458842277527, "epoch": 3.63, "learning_rate": 3.1842772612003384e-05, "loss": 0.5186, "step": 4296, "task_loss": 0.5466291308403015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42933404445648193, "epoch": 3.63, "learning_rate": 3.18385460693153e-05, "loss": 0.4203, "step": 4297, "task_loss": 0.537109911441803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5229090452194214, "epoch": 3.63, "learning_rate": 3.1834319526627224e-05, "loss": 0.6674, "step": 4298, "task_loss": 1.1762605905532837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41915687918663025, "epoch": 3.63, "learning_rate": 3.1830092983939144e-05, "loss": 0.4691, "step": 4299, "task_loss": 0.5037978887557983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4521094560623169, "epoch": 3.63, "learning_rate": 3.1825866441251057e-05, "loss": 0.6811, "step": 4300, "task_loss": 1.2478934526443481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5719486474990845, "epoch": 3.64, "learning_rate": 3.1821639898562976e-05, "loss": 0.6389, "step": 4301, "task_loss": 0.5874079465866089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5724771022796631, "epoch": 3.64, "learning_rate": 3.1817413355874896e-05, "loss": 0.6631, "step": 4302, "task_loss": 0.8944747447967529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6874879002571106, "epoch": 3.64, "learning_rate": 3.1813186813186816e-05, "loss": 0.633, "step": 4303, "task_loss": 1.3476438522338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.721340537071228, "epoch": 3.64, "learning_rate": 3.1808960270498735e-05, "loss": 0.6187, "step": 4304, "task_loss": 0.5936524868011475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8770106434822083, "epoch": 3.64, "learning_rate": 3.1804733727810655e-05, "loss": 0.712, "step": 4305, "task_loss": 1.4871197938919067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40573886036872864, "epoch": 3.64, "learning_rate": 3.180050718512257e-05, "loss": 0.5775, "step": 4306, "task_loss": 0.27883994579315186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45438435673713684, "epoch": 3.64, "learning_rate": 3.179628064243449e-05, "loss": 0.4623, "step": 4307, "task_loss": 0.5053079724311829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30068594217300415, "epoch": 3.64, "learning_rate": 3.179205409974641e-05, "loss": 0.5082, "step": 4308, "task_loss": 0.220564067363739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41267597675323486, "epoch": 3.64, "learning_rate": 3.178782755705833e-05, "loss": 0.8542, "step": 4309, "task_loss": 1.4706257581710815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9790067672729492, "epoch": 3.64, "learning_rate": 3.178360101437025e-05, "loss": 0.7086, "step": 4310, "task_loss": 1.2004257440567017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.007163405418396, "epoch": 3.64, "learning_rate": 3.177937447168217e-05, "loss": 0.6727, "step": 4311, "task_loss": 0.6934151649475098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7979511022567749, "epoch": 3.64, "learning_rate": 3.177514792899409e-05, "loss": 0.6568, "step": 4312, "task_loss": 0.7616295218467712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5598003268241882, "epoch": 3.65, "learning_rate": 3.1770921386306e-05, "loss": 0.7707, "step": 4313, "task_loss": 0.5135715007781982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41841548681259155, "epoch": 3.65, "learning_rate": 3.176669484361792e-05, "loss": 0.6926, "step": 4314, "task_loss": 0.642728328704834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5688259601593018, "epoch": 3.65, "learning_rate": 3.1762468300929846e-05, "loss": 0.5396, "step": 4315, "task_loss": 0.8958868980407715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4546813368797302, "epoch": 3.65, "learning_rate": 3.175824175824176e-05, "loss": 0.5595, "step": 4316, "task_loss": 0.49206963181495667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6616531610488892, "epoch": 3.65, "learning_rate": 3.175401521555368e-05, "loss": 0.6062, "step": 4317, "task_loss": 1.2377668619155884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5755642652511597, "epoch": 3.65, "learning_rate": 3.17497886728656e-05, "loss": 0.6434, "step": 4318, "task_loss": 0.6238715648651123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35224103927612305, "epoch": 3.65, "learning_rate": 3.174556213017751e-05, "loss": 0.4863, "step": 4319, "task_loss": 0.45886775851249695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7452940940856934, "epoch": 3.65, "learning_rate": 3.174133558748944e-05, "loss": 0.6366, "step": 4320, "task_loss": 1.2794350385665894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.301813542842865, "epoch": 3.65, "learning_rate": 3.173710904480136e-05, "loss": 0.6275, "step": 4321, "task_loss": 0.7418091297149658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3648691177368164, "epoch": 3.65, "learning_rate": 3.173288250211327e-05, "loss": 0.6712, "step": 4322, "task_loss": 0.1716543734073639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8355896472930908, "epoch": 3.65, "learning_rate": 3.172865595942519e-05, "loss": 0.7054, "step": 4323, "task_loss": 1.4562668800354004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395461440086365, "epoch": 3.65, "learning_rate": 3.172442941673711e-05, "loss": 0.582, "step": 4324, "task_loss": 0.5881152749061584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2862630784511566, "epoch": 3.66, "learning_rate": 3.172020287404903e-05, "loss": 0.4907, "step": 4325, "task_loss": 0.048797111958265305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5867465734481812, "epoch": 3.66, "learning_rate": 3.171597633136095e-05, "loss": 0.5474, "step": 4326, "task_loss": 1.4700002670288086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41917672753334045, "epoch": 3.66, "learning_rate": 3.171174978867287e-05, "loss": 0.5455, "step": 4327, "task_loss": 0.4365634620189667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45651277899742126, "epoch": 3.66, "learning_rate": 3.170752324598479e-05, "loss": 0.5857, "step": 4328, "task_loss": 0.3946208655834198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46523886919021606, "epoch": 3.66, "learning_rate": 3.17032967032967e-05, "loss": 0.5393, "step": 4329, "task_loss": 0.3044187128543854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0740121603012085, "epoch": 3.66, "learning_rate": 3.169907016060862e-05, "loss": 0.7448, "step": 4330, "task_loss": 1.0956947803497314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.526522696018219, "epoch": 3.66, "learning_rate": 3.169484361792054e-05, "loss": 0.56, "step": 4331, "task_loss": 0.28842079639434814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39969658851623535, "epoch": 3.66, "learning_rate": 3.169061707523246e-05, "loss": 0.4289, "step": 4332, "task_loss": 0.5743923187255859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9064669609069824, "epoch": 3.66, "learning_rate": 3.168639053254438e-05, "loss": 0.5589, "step": 4333, "task_loss": 0.5419033765792847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5803412199020386, "epoch": 3.66, "learning_rate": 3.16821639898563e-05, "loss": 0.5058, "step": 4334, "task_loss": 1.0003029108047485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4588584899902344, "epoch": 3.66, "learning_rate": 3.1677937447168214e-05, "loss": 0.7081, "step": 4335, "task_loss": 0.5820468068122864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39306706190109253, "epoch": 3.66, "learning_rate": 3.167371090448013e-05, "loss": 0.4372, "step": 4336, "task_loss": 0.1411595493555069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3232420086860657, "epoch": 3.67, "learning_rate": 3.166948436179206e-05, "loss": 0.6855, "step": 4337, "task_loss": 0.4553565979003906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9395290613174438, "epoch": 3.67, "learning_rate": 3.166525781910397e-05, "loss": 0.7082, "step": 4338, "task_loss": 0.8924969434738159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45405763387680054, "epoch": 3.67, "learning_rate": 3.166103127641589e-05, "loss": 0.389, "step": 4339, "task_loss": 0.06701885908842087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7120287418365479, "epoch": 3.67, "learning_rate": 3.165680473372781e-05, "loss": 0.749, "step": 4340, "task_loss": 0.7082578539848328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4376968741416931, "epoch": 3.67, "learning_rate": 3.165257819103973e-05, "loss": 0.7204, "step": 4341, "task_loss": 0.7110418677330017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3074084520339966, "epoch": 3.67, "learning_rate": 3.164835164835165e-05, "loss": 0.5375, "step": 4342, "task_loss": 0.2828150987625122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7828773856163025, "epoch": 3.67, "learning_rate": 3.164412510566357e-05, "loss": 0.6483, "step": 4343, "task_loss": 0.7823633551597595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5047447681427002, "epoch": 3.67, "learning_rate": 3.163989856297549e-05, "loss": 0.5744, "step": 4344, "task_loss": 1.5857350826263428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2383936047554016, "epoch": 3.67, "learning_rate": 3.1635672020287404e-05, "loss": 0.5245, "step": 4345, "task_loss": 0.18443283438682556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.375379353761673, "epoch": 3.67, "learning_rate": 3.1631445477599324e-05, "loss": 0.5501, "step": 4346, "task_loss": 0.6462752223014832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43102389574050903, "epoch": 3.67, "learning_rate": 3.1627218934911244e-05, "loss": 0.4801, "step": 4347, "task_loss": 0.4378305673599243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6170026659965515, "epoch": 3.67, "learning_rate": 3.162299239222316e-05, "loss": 0.7544, "step": 4348, "task_loss": 0.24095311760902405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9279892444610596, "epoch": 3.68, "learning_rate": 3.161876584953508e-05, "loss": 0.6585, "step": 4349, "task_loss": 1.3238933086395264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9275033473968506, "epoch": 3.68, "learning_rate": 3.1614539306847e-05, "loss": 0.6037, "step": 4350, "task_loss": 1.1025723218917847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7628381252288818, "epoch": 3.68, "learning_rate": 3.1610312764158916e-05, "loss": 0.6579, "step": 4351, "task_loss": 0.606580913066864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9433069229125977, "epoch": 3.68, "learning_rate": 3.1606086221470836e-05, "loss": 0.8221, "step": 4352, "task_loss": 0.8019368052482605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7382798790931702, "epoch": 3.68, "learning_rate": 3.1601859678782755e-05, "loss": 0.8307, "step": 4353, "task_loss": 1.1192035675048828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.545191764831543, "epoch": 3.68, "learning_rate": 3.159763313609468e-05, "loss": 0.8978, "step": 4354, "task_loss": 1.6121832132339478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6115432977676392, "epoch": 3.68, "learning_rate": 3.1593406593406595e-05, "loss": 0.7274, "step": 4355, "task_loss": 1.1889289617538452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40386080741882324, "epoch": 3.68, "learning_rate": 3.1589180050718514e-05, "loss": 0.597, "step": 4356, "task_loss": 1.165809154510498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5622716546058655, "epoch": 3.68, "learning_rate": 3.1584953508030434e-05, "loss": 0.5775, "step": 4357, "task_loss": 0.801180362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.625637412071228, "epoch": 3.68, "learning_rate": 3.158072696534235e-05, "loss": 0.5286, "step": 4358, "task_loss": 0.6454900503158569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3796696066856384, "epoch": 3.68, "learning_rate": 3.1576500422654274e-05, "loss": 0.5803, "step": 4359, "task_loss": 0.852182924747467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.580944299697876, "epoch": 3.69, "learning_rate": 3.1572273879966193e-05, "loss": 0.5197, "step": 4360, "task_loss": 0.41458451747894287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48113706707954407, "epoch": 3.69, "learning_rate": 3.1568047337278106e-05, "loss": 0.6058, "step": 4361, "task_loss": 0.5011580586433411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4710251986980438, "epoch": 3.69, "learning_rate": 3.1563820794590026e-05, "loss": 0.652, "step": 4362, "task_loss": 0.9010404348373413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6202372312545776, "epoch": 3.69, "learning_rate": 3.1559594251901946e-05, "loss": 0.5504, "step": 4363, "task_loss": 0.9609682559967041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.86912602186203, "epoch": 3.69, "learning_rate": 3.155536770921386e-05, "loss": 0.5964, "step": 4364, "task_loss": 0.5776647925376892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.114647626876831, "epoch": 3.69, "learning_rate": 3.1551141166525785e-05, "loss": 0.7971, "step": 4365, "task_loss": 0.8565689921379089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49282217025756836, "epoch": 3.69, "learning_rate": 3.1546914623837705e-05, "loss": 0.6618, "step": 4366, "task_loss": 1.132073163986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0035711526870728, "epoch": 3.69, "learning_rate": 3.154268808114962e-05, "loss": 0.8887, "step": 4367, "task_loss": 1.2741198539733887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6079678535461426, "epoch": 3.69, "learning_rate": 3.153846153846154e-05, "loss": 0.5419, "step": 4368, "task_loss": 0.9336712956428528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.450817346572876, "epoch": 3.69, "learning_rate": 3.153423499577346e-05, "loss": 0.4887, "step": 4369, "task_loss": 0.6696841716766357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40493929386138916, "epoch": 3.69, "learning_rate": 3.153000845308538e-05, "loss": 0.6124, "step": 4370, "task_loss": 0.5956547260284424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7713926434516907, "epoch": 3.69, "learning_rate": 3.15257819103973e-05, "loss": 0.6891, "step": 4371, "task_loss": 0.187714621424675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4834434390068054, "epoch": 3.7, "learning_rate": 3.152155536770922e-05, "loss": 0.4901, "step": 4372, "task_loss": 0.9323393702507019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1184810400009155, "epoch": 3.7, "learning_rate": 3.1517328825021136e-05, "loss": 0.607, "step": 4373, "task_loss": 0.9886298179626465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4940279424190521, "epoch": 3.7, "learning_rate": 3.151310228233305e-05, "loss": 0.5571, "step": 4374, "task_loss": 0.1513417661190033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5817437171936035, "epoch": 3.7, "learning_rate": 3.150887573964497e-05, "loss": 0.6751, "step": 4375, "task_loss": 1.9388114213943481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3639531135559082, "epoch": 3.7, "learning_rate": 3.1504649196956896e-05, "loss": 0.6742, "step": 4376, "task_loss": 0.45520275831222534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40285754203796387, "epoch": 3.7, "learning_rate": 3.150042265426881e-05, "loss": 0.5499, "step": 4377, "task_loss": 0.9072644114494324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3399021029472351, "epoch": 3.7, "learning_rate": 3.149619611158073e-05, "loss": 0.7381, "step": 4378, "task_loss": 0.817537248134613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.465384304523468, "epoch": 3.7, "learning_rate": 3.149196956889265e-05, "loss": 0.5999, "step": 4379, "task_loss": 0.26480424404144287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.560734212398529, "epoch": 3.7, "learning_rate": 3.148774302620456e-05, "loss": 0.7969, "step": 4380, "task_loss": 0.5340701937675476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3710169196128845, "epoch": 3.7, "learning_rate": 3.148351648351648e-05, "loss": 0.6207, "step": 4381, "task_loss": 0.07768706232309341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3791731595993042, "epoch": 3.7, "learning_rate": 3.147928994082841e-05, "loss": 0.6673, "step": 4382, "task_loss": 0.15831103920936584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7198534607887268, "epoch": 3.7, "learning_rate": 3.147506339814033e-05, "loss": 0.7723, "step": 4383, "task_loss": 0.12946297228336334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8525928258895874, "epoch": 3.71, "learning_rate": 3.147083685545224e-05, "loss": 0.6921, "step": 4384, "task_loss": 0.4998167157173157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8171555995941162, "epoch": 3.71, "learning_rate": 3.146661031276416e-05, "loss": 0.6871, "step": 4385, "task_loss": 1.2481720447540283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7714055180549622, "epoch": 3.71, "learning_rate": 3.146238377007608e-05, "loss": 0.5675, "step": 4386, "task_loss": 0.1793496161699295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5537706613540649, "epoch": 3.71, "learning_rate": 3.1458157227388e-05, "loss": 0.629, "step": 4387, "task_loss": 1.3294854164123535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6943092346191406, "epoch": 3.71, "learning_rate": 3.145393068469992e-05, "loss": 0.6324, "step": 4388, "task_loss": 0.33173462748527527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7647502422332764, "epoch": 3.71, "learning_rate": 3.144970414201184e-05, "loss": 0.6857, "step": 4389, "task_loss": 1.4498182535171509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28095296025276184, "epoch": 3.71, "learning_rate": 3.144547759932375e-05, "loss": 0.4888, "step": 4390, "task_loss": 0.1757201850414276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.285271555185318, "epoch": 3.71, "learning_rate": 3.144125105663567e-05, "loss": 0.6081, "step": 4391, "task_loss": 1.0653576850891113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5226113796234131, "epoch": 3.71, "learning_rate": 3.143702451394759e-05, "loss": 0.6865, "step": 4392, "task_loss": 0.9720564484596252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6892796158790588, "epoch": 3.71, "learning_rate": 3.143279797125951e-05, "loss": 0.7721, "step": 4393, "task_loss": 0.23807276785373688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6205359101295471, "epoch": 3.71, "learning_rate": 3.142857142857143e-05, "loss": 0.6149, "step": 4394, "task_loss": 1.3027851581573486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4892859160900116, "epoch": 3.71, "learning_rate": 3.142434488588335e-05, "loss": 0.6674, "step": 4395, "task_loss": 0.780527651309967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28657805919647217, "epoch": 3.72, "learning_rate": 3.142011834319526e-05, "loss": 0.562, "step": 4396, "task_loss": 0.3991193473339081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7082350254058838, "epoch": 3.72, "learning_rate": 3.141589180050718e-05, "loss": 0.5793, "step": 4397, "task_loss": 0.8086472153663635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9290770292282104, "epoch": 3.72, "learning_rate": 3.14116652578191e-05, "loss": 0.6864, "step": 4398, "task_loss": 0.8588287234306335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0052218437194824, "epoch": 3.72, "learning_rate": 3.140743871513103e-05, "loss": 0.7908, "step": 4399, "task_loss": 1.4899024963378906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5374901294708252, "epoch": 3.72, "learning_rate": 3.140321217244294e-05, "loss": 0.5116, "step": 4400, "task_loss": 0.24133923649787903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5099197030067444, "epoch": 3.72, "learning_rate": 3.139898562975486e-05, "loss": 0.6579, "step": 4401, "task_loss": 1.5236730575561523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3611644208431244, "epoch": 3.72, "learning_rate": 3.139475908706678e-05, "loss": 0.6015, "step": 4402, "task_loss": 0.6340007185935974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40776538848876953, "epoch": 3.72, "learning_rate": 3.1390532544378695e-05, "loss": 0.5825, "step": 4403, "task_loss": 0.5617125034332275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5205349922180176, "epoch": 3.72, "learning_rate": 3.138630600169062e-05, "loss": 0.6563, "step": 4404, "task_loss": 0.34392574429512024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5175126791000366, "epoch": 3.72, "learning_rate": 3.138207945900254e-05, "loss": 0.5221, "step": 4405, "task_loss": 0.28904277086257935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.067955732345581, "epoch": 3.72, "learning_rate": 3.1377852916314454e-05, "loss": 0.8143, "step": 4406, "task_loss": 1.4027005434036255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2872017025947571, "epoch": 3.72, "learning_rate": 3.1373626373626374e-05, "loss": 0.6213, "step": 4407, "task_loss": 0.19852782785892487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5442214608192444, "epoch": 3.73, "learning_rate": 3.1369399830938293e-05, "loss": 0.6758, "step": 4408, "task_loss": 0.3480748236179352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5705583095550537, "epoch": 3.73, "learning_rate": 3.136517328825021e-05, "loss": 0.4024, "step": 4409, "task_loss": 1.0452461242675781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3976019322872162, "epoch": 3.73, "learning_rate": 3.136094674556213e-05, "loss": 0.8186, "step": 4410, "task_loss": 0.7618409395217896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6189587116241455, "epoch": 3.73, "learning_rate": 3.135672020287405e-05, "loss": 0.8592, "step": 4411, "task_loss": 0.8902673125267029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8908120393753052, "epoch": 3.73, "learning_rate": 3.135249366018597e-05, "loss": 0.6737, "step": 4412, "task_loss": 0.8862276673316956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5203185081481934, "epoch": 3.73, "learning_rate": 3.1348267117497885e-05, "loss": 0.4746, "step": 4413, "task_loss": 1.1344376802444458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6784696578979492, "epoch": 3.73, "learning_rate": 3.1344040574809805e-05, "loss": 0.6913, "step": 4414, "task_loss": 1.3508683443069458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7924786806106567, "epoch": 3.73, "learning_rate": 3.1339814032121725e-05, "loss": 0.607, "step": 4415, "task_loss": 0.5525851249694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1014690399169922, "epoch": 3.73, "learning_rate": 3.1335587489433645e-05, "loss": 0.6748, "step": 4416, "task_loss": 0.7910175919532776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6909679174423218, "epoch": 3.73, "learning_rate": 3.1331360946745564e-05, "loss": 0.6901, "step": 4417, "task_loss": 1.2508430480957031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3092275559902191, "epoch": 3.73, "learning_rate": 3.1327134404057484e-05, "loss": 0.5479, "step": 4418, "task_loss": 0.1661626249551773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44045236706733704, "epoch": 3.73, "learning_rate": 3.13229078613694e-05, "loss": 0.4972, "step": 4419, "task_loss": 0.5092931389808655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6696663498878479, "epoch": 3.74, "learning_rate": 3.131868131868132e-05, "loss": 0.5663, "step": 4420, "task_loss": 0.7401490211486816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8323312401771545, "epoch": 3.74, "learning_rate": 3.131445477599324e-05, "loss": 0.5374, "step": 4421, "task_loss": 1.1042890548706055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6723310947418213, "epoch": 3.74, "learning_rate": 3.1310228233305156e-05, "loss": 0.63, "step": 4422, "task_loss": 1.2064276933670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4610983729362488, "epoch": 3.74, "learning_rate": 3.1306001690617076e-05, "loss": 0.5279, "step": 4423, "task_loss": 1.6228435039520264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4889376759529114, "epoch": 3.74, "learning_rate": 3.1301775147928996e-05, "loss": 0.5157, "step": 4424, "task_loss": 0.6824313998222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7890335917472839, "epoch": 3.74, "learning_rate": 3.129754860524091e-05, "loss": 0.6034, "step": 4425, "task_loss": 1.9725552797317505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4488603174686432, "epoch": 3.74, "learning_rate": 3.1293322062552835e-05, "loss": 0.5131, "step": 4426, "task_loss": 0.3517454266548157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5910731554031372, "epoch": 3.74, "learning_rate": 3.1289095519864755e-05, "loss": 0.4865, "step": 4427, "task_loss": 0.8392993211746216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6440855264663696, "epoch": 3.74, "learning_rate": 3.1284868977176675e-05, "loss": 0.7271, "step": 4428, "task_loss": 0.8623766303062439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6308737993240356, "epoch": 3.74, "learning_rate": 3.128064243448859e-05, "loss": 0.5544, "step": 4429, "task_loss": 0.3763124942779541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8419972062110901, "epoch": 3.74, "learning_rate": 3.127641589180051e-05, "loss": 0.8696, "step": 4430, "task_loss": 1.134678840637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0648462772369385, "epoch": 3.75, "learning_rate": 3.127218934911243e-05, "loss": 0.7891, "step": 4431, "task_loss": 1.5075888633728027 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4411104619503021, "epoch": 3.75, "learning_rate": 3.126796280642435e-05, "loss": 0.5469, "step": 4432, "task_loss": 0.7082804441452026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8028160333633423, "epoch": 3.75, "learning_rate": 3.1263736263736267e-05, "loss": 0.7598, "step": 4433, "task_loss": 0.6093019843101501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5423300862312317, "epoch": 3.75, "learning_rate": 3.1259509721048186e-05, "loss": 0.564, "step": 4434, "task_loss": 0.3890649080276489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7079875469207764, "epoch": 3.75, "learning_rate": 3.12552831783601e-05, "loss": 0.6192, "step": 4435, "task_loss": 0.4845954179763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5152788162231445, "epoch": 3.75, "learning_rate": 3.125105663567202e-05, "loss": 0.6525, "step": 4436, "task_loss": 0.7137448191642761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38127589225769043, "epoch": 3.75, "learning_rate": 3.124683009298394e-05, "loss": 0.5073, "step": 4437, "task_loss": 0.3960942029953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.919884204864502, "epoch": 3.75, "learning_rate": 3.124260355029586e-05, "loss": 0.5951, "step": 4438, "task_loss": 0.6805700659751892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36860549449920654, "epoch": 3.75, "learning_rate": 3.123837700760778e-05, "loss": 0.5476, "step": 4439, "task_loss": 0.31832200288772583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7535417675971985, "epoch": 3.75, "learning_rate": 3.12341504649197e-05, "loss": 0.6445, "step": 4440, "task_loss": 1.4454289674758911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8517183065414429, "epoch": 3.75, "learning_rate": 3.122992392223162e-05, "loss": 0.9319, "step": 4441, "task_loss": 0.8230589628219604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5334685444831848, "epoch": 3.75, "learning_rate": 3.122569737954353e-05, "loss": 0.6219, "step": 4442, "task_loss": 0.42743802070617676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7406021952629089, "epoch": 3.76, "learning_rate": 3.122147083685546e-05, "loss": 0.6814, "step": 4443, "task_loss": 0.5760184526443481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4810228943824768, "epoch": 3.76, "learning_rate": 3.121724429416738e-05, "loss": 0.7459, "step": 4444, "task_loss": 1.1669907569885254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9073999524116516, "epoch": 3.76, "learning_rate": 3.121301775147929e-05, "loss": 0.5434, "step": 4445, "task_loss": 0.8882380723953247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4339657425880432, "epoch": 3.76, "learning_rate": 3.120879120879121e-05, "loss": 0.6492, "step": 4446, "task_loss": 0.2799747586250305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.941970705986023, "epoch": 3.76, "learning_rate": 3.120456466610313e-05, "loss": 0.7707, "step": 4447, "task_loss": 0.6933077573776245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7601891756057739, "epoch": 3.76, "learning_rate": 3.120033812341505e-05, "loss": 0.678, "step": 4448, "task_loss": 1.7052204608917236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.188031405210495, "epoch": 3.76, "learning_rate": 3.119611158072697e-05, "loss": 0.4625, "step": 4449, "task_loss": 0.014701290987432003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7593672275543213, "epoch": 3.76, "learning_rate": 3.119188503803889e-05, "loss": 0.657, "step": 4450, "task_loss": 1.521148920059204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6656076908111572, "epoch": 3.76, "learning_rate": 3.11876584953508e-05, "loss": 0.6235, "step": 4451, "task_loss": 0.9446977376937866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1436831951141357, "epoch": 3.76, "learning_rate": 3.118343195266272e-05, "loss": 0.6096, "step": 4452, "task_loss": 0.6983094811439514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9589691162109375, "epoch": 3.76, "learning_rate": 3.117920540997464e-05, "loss": 0.5742, "step": 4453, "task_loss": 0.5564941167831421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4245166480541229, "epoch": 3.76, "learning_rate": 3.117497886728656e-05, "loss": 0.3938, "step": 4454, "task_loss": 0.5738080739974976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7556803226470947, "epoch": 3.77, "learning_rate": 3.117075232459848e-05, "loss": 0.659, "step": 4455, "task_loss": 0.25264406204223633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.588709831237793, "epoch": 3.77, "learning_rate": 3.11665257819104e-05, "loss": 0.6914, "step": 4456, "task_loss": 0.8855775594711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6776687502861023, "epoch": 3.77, "learning_rate": 3.116229923922232e-05, "loss": 0.4626, "step": 4457, "task_loss": 0.5914424657821655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48696398735046387, "epoch": 3.77, "learning_rate": 3.115807269653423e-05, "loss": 0.7374, "step": 4458, "task_loss": 0.5321323871612549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2600225806236267, "epoch": 3.77, "learning_rate": 3.115384615384615e-05, "loss": 0.484, "step": 4459, "task_loss": 0.46346914768218994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4186503291130066, "epoch": 3.77, "learning_rate": 3.114961961115808e-05, "loss": 0.4702, "step": 4460, "task_loss": 0.8028692007064819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4472455084323883, "epoch": 3.77, "learning_rate": 3.114539306846999e-05, "loss": 0.5175, "step": 4461, "task_loss": 0.5751044154167175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3327191472053528, "epoch": 3.77, "learning_rate": 3.114116652578191e-05, "loss": 0.5316, "step": 4462, "task_loss": 0.2516881227493286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9260343313217163, "epoch": 3.77, "learning_rate": 3.113693998309383e-05, "loss": 0.676, "step": 4463, "task_loss": 0.5999809503555298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6782312989234924, "epoch": 3.77, "learning_rate": 3.1132713440405745e-05, "loss": 0.752, "step": 4464, "task_loss": 1.2817986011505127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6236807703971863, "epoch": 3.77, "learning_rate": 3.112848689771767e-05, "loss": 0.7073, "step": 4465, "task_loss": 0.9672214984893799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5089489221572876, "epoch": 3.77, "learning_rate": 3.112426035502959e-05, "loss": 0.5038, "step": 4466, "task_loss": 0.08270702511072159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.432626873254776, "epoch": 3.78, "learning_rate": 3.1120033812341504e-05, "loss": 0.5698, "step": 4467, "task_loss": 0.45906898379325867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4977017939090729, "epoch": 3.78, "learning_rate": 3.1115807269653424e-05, "loss": 0.5934, "step": 4468, "task_loss": 0.29010775685310364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1058757305145264, "epoch": 3.78, "learning_rate": 3.111158072696534e-05, "loss": 0.7239, "step": 4469, "task_loss": 2.229424476623535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5502418279647827, "epoch": 3.78, "learning_rate": 3.110735418427726e-05, "loss": 0.5335, "step": 4470, "task_loss": 0.08442430943250656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34887951612472534, "epoch": 3.78, "learning_rate": 3.110312764158918e-05, "loss": 0.5992, "step": 4471, "task_loss": 0.4855583608150482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6222760081291199, "epoch": 3.78, "learning_rate": 3.10989010989011e-05, "loss": 0.6416, "step": 4472, "task_loss": 0.48976996541023254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34236112236976624, "epoch": 3.78, "learning_rate": 3.109467455621302e-05, "loss": 0.4734, "step": 4473, "task_loss": 0.497106671333313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47770261764526367, "epoch": 3.78, "learning_rate": 3.1090448013524935e-05, "loss": 0.4986, "step": 4474, "task_loss": 0.6153969168663025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.685827374458313, "epoch": 3.78, "learning_rate": 3.1086221470836855e-05, "loss": 0.5499, "step": 4475, "task_loss": 0.11037730425596237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29985013604164124, "epoch": 3.78, "learning_rate": 3.1081994928148775e-05, "loss": 0.6297, "step": 4476, "task_loss": 0.8097463846206665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7170051336288452, "epoch": 3.78, "learning_rate": 3.1077768385460694e-05, "loss": 0.6078, "step": 4477, "task_loss": 0.8717119693756104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7694337368011475, "epoch": 3.78, "learning_rate": 3.1073541842772614e-05, "loss": 0.6077, "step": 4478, "task_loss": 1.08436918258667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3849923610687256, "epoch": 3.79, "learning_rate": 3.1069315300084534e-05, "loss": 0.4593, "step": 4479, "task_loss": 0.4722740650177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5677958726882935, "epoch": 3.79, "learning_rate": 3.106508875739645e-05, "loss": 0.5593, "step": 4480, "task_loss": 0.15746158361434937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7945002317428589, "epoch": 3.79, "learning_rate": 3.1060862214708367e-05, "loss": 0.608, "step": 4481, "task_loss": 0.7113239169120789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5034031867980957, "epoch": 3.79, "learning_rate": 3.105663567202029e-05, "loss": 0.5462, "step": 4482, "task_loss": 1.173659086227417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36249810457229614, "epoch": 3.79, "learning_rate": 3.1052409129332206e-05, "loss": 0.4536, "step": 4483, "task_loss": 0.055759161710739136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5031832456588745, "epoch": 3.79, "learning_rate": 3.1048182586644126e-05, "loss": 0.5148, "step": 4484, "task_loss": 0.9738038778305054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5841338634490967, "epoch": 3.79, "learning_rate": 3.1043956043956046e-05, "loss": 0.6619, "step": 4485, "task_loss": 0.8691191673278809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7698627710342407, "epoch": 3.79, "learning_rate": 3.1039729501267965e-05, "loss": 0.5726, "step": 4486, "task_loss": 1.609784483909607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7682676315307617, "epoch": 3.79, "learning_rate": 3.1035502958579885e-05, "loss": 0.5144, "step": 4487, "task_loss": 1.391501545906067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7411738038063049, "epoch": 3.79, "learning_rate": 3.1031276415891805e-05, "loss": 0.6006, "step": 4488, "task_loss": 1.196273684501648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4277286231517792, "epoch": 3.79, "learning_rate": 3.1027049873203725e-05, "loss": 0.5148, "step": 4489, "task_loss": 0.6398351788520813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4339134097099304, "epoch": 3.79, "learning_rate": 3.102282333051564e-05, "loss": 0.5867, "step": 4490, "task_loss": 0.987057626247406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39959561824798584, "epoch": 3.8, "learning_rate": 3.101859678782756e-05, "loss": 0.4816, "step": 4491, "task_loss": 0.6280518770217896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7162389755249023, "epoch": 3.8, "learning_rate": 3.101437024513948e-05, "loss": 0.7001, "step": 4492, "task_loss": 1.1401653289794922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7851250767707825, "epoch": 3.8, "learning_rate": 3.10101437024514e-05, "loss": 0.6459, "step": 4493, "task_loss": 1.1911137104034424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.635193407535553, "epoch": 3.8, "learning_rate": 3.1005917159763316e-05, "loss": 0.6207, "step": 4494, "task_loss": 0.6690196394920349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8584632873535156, "epoch": 3.8, "learning_rate": 3.1001690617075236e-05, "loss": 0.5074, "step": 4495, "task_loss": 0.7543220520019531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8067877292633057, "epoch": 3.8, "learning_rate": 3.099746407438715e-05, "loss": 0.6872, "step": 4496, "task_loss": 0.7726377248764038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5504999160766602, "epoch": 3.8, "learning_rate": 3.099323753169907e-05, "loss": 0.5616, "step": 4497, "task_loss": 0.6769705414772034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.522042453289032, "epoch": 3.8, "learning_rate": 3.098901098901099e-05, "loss": 0.587, "step": 4498, "task_loss": 0.2908691465854645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43705064058303833, "epoch": 3.8, "learning_rate": 3.0984784446322915e-05, "loss": 0.6191, "step": 4499, "task_loss": 0.2567892074584961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5280297994613647, "epoch": 3.8, "learning_rate": 3.098055790363483e-05, "loss": 0.6967, "step": 4500, "task_loss": 1.2133866548538208 }, { "epoch": 3.8, "eval_accuracy": 0.8985742574257426, "eval_loss": 0.38136711716651917, "eval_runtime": 227.6852, "eval_samples_per_second": 110.899, "eval_steps_per_second": 0.87, "step": 4500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8794289827346802, "epoch": 3.8, "learning_rate": 3.097633136094675e-05, "loss": 0.7024, "step": 4501, "task_loss": 0.5016676783561707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5788029432296753, "epoch": 3.81, "learning_rate": 3.097210481825867e-05, "loss": 0.5417, "step": 4502, "task_loss": 0.4744543433189392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42210519313812256, "epoch": 3.81, "learning_rate": 3.096787827557058e-05, "loss": 0.4941, "step": 4503, "task_loss": 0.48587310314178467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4772506058216095, "epoch": 3.81, "learning_rate": 3.096365173288251e-05, "loss": 0.5663, "step": 4504, "task_loss": 0.3254431486129761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44926780462265015, "epoch": 3.81, "learning_rate": 3.095942519019443e-05, "loss": 0.6775, "step": 4505, "task_loss": 0.721937358379364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6382407546043396, "epoch": 3.81, "learning_rate": 3.095519864750634e-05, "loss": 0.5624, "step": 4506, "task_loss": 1.164475679397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7414417862892151, "epoch": 3.81, "learning_rate": 3.095097210481826e-05, "loss": 0.5192, "step": 4507, "task_loss": 1.4476611614227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3845599889755249, "epoch": 3.81, "learning_rate": 3.094674556213018e-05, "loss": 0.5151, "step": 4508, "task_loss": 0.7954856157302856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1481575965881348, "epoch": 3.81, "learning_rate": 3.094251901944209e-05, "loss": 0.6428, "step": 4509, "task_loss": 0.6410477161407471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9641256332397461, "epoch": 3.81, "learning_rate": 3.093829247675402e-05, "loss": 0.6298, "step": 4510, "task_loss": 1.6068434715270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0639525651931763, "epoch": 3.81, "learning_rate": 3.093406593406594e-05, "loss": 0.7856, "step": 4511, "task_loss": 0.5756399631500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8429574966430664, "epoch": 3.81, "learning_rate": 3.092983939137785e-05, "loss": 0.5936, "step": 4512, "task_loss": 0.6623560190200806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34575480222702026, "epoch": 3.81, "learning_rate": 3.092561284868977e-05, "loss": 0.5662, "step": 4513, "task_loss": 0.52729332447052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5963305234909058, "epoch": 3.82, "learning_rate": 3.092138630600169e-05, "loss": 0.6183, "step": 4514, "task_loss": 0.3971259593963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.475811630487442, "epoch": 3.82, "learning_rate": 3.091715976331361e-05, "loss": 0.5257, "step": 4515, "task_loss": 0.9267137050628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4244786202907562, "epoch": 3.82, "learning_rate": 3.091293322062553e-05, "loss": 0.4573, "step": 4516, "task_loss": 0.19998353719711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5385617017745972, "epoch": 3.82, "learning_rate": 3.090870667793745e-05, "loss": 0.6072, "step": 4517, "task_loss": 1.0044212341308594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3737107217311859, "epoch": 3.82, "learning_rate": 3.090448013524937e-05, "loss": 0.5639, "step": 4518, "task_loss": 0.7181384563446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4733601212501526, "epoch": 3.82, "learning_rate": 3.090025359256128e-05, "loss": 0.5285, "step": 4519, "task_loss": 0.630943775177002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38003605604171753, "epoch": 3.82, "learning_rate": 3.08960270498732e-05, "loss": 0.3895, "step": 4520, "task_loss": 0.33974841237068176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3528686761856079, "epoch": 3.82, "learning_rate": 3.089180050718513e-05, "loss": 0.5676, "step": 4521, "task_loss": 1.288833498954773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0590084791183472, "epoch": 3.82, "learning_rate": 3.088757396449704e-05, "loss": 0.6612, "step": 4522, "task_loss": 0.3500828146934509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8006274700164795, "epoch": 3.82, "learning_rate": 3.088334742180896e-05, "loss": 0.8013, "step": 4523, "task_loss": 0.4747190773487091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6032232046127319, "epoch": 3.82, "learning_rate": 3.087912087912088e-05, "loss": 0.5985, "step": 4524, "task_loss": 0.9979947805404663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7694937586784363, "epoch": 3.82, "learning_rate": 3.0874894336432794e-05, "loss": 0.6899, "step": 4525, "task_loss": 0.5824013352394104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6039519906044006, "epoch": 3.83, "learning_rate": 3.0870667793744714e-05, "loss": 0.7181, "step": 4526, "task_loss": 0.6761059761047363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46193987131118774, "epoch": 3.83, "learning_rate": 3.086644125105664e-05, "loss": 0.6042, "step": 4527, "task_loss": 0.5998802781105042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8607094883918762, "epoch": 3.83, "learning_rate": 3.086221470836856e-05, "loss": 0.7503, "step": 4528, "task_loss": 1.418262004852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6125825047492981, "epoch": 3.83, "learning_rate": 3.0857988165680473e-05, "loss": 0.5619, "step": 4529, "task_loss": 0.31533950567245483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6773574352264404, "epoch": 3.83, "learning_rate": 3.085376162299239e-05, "loss": 0.7213, "step": 4530, "task_loss": 0.7295495867729187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6777825355529785, "epoch": 3.83, "learning_rate": 3.084953508030431e-05, "loss": 0.8211, "step": 4531, "task_loss": 1.4275426864624023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44163185358047485, "epoch": 3.83, "learning_rate": 3.084530853761623e-05, "loss": 0.5865, "step": 4532, "task_loss": 0.28577885031700134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8128281831741333, "epoch": 3.83, "learning_rate": 3.084108199492815e-05, "loss": 0.609, "step": 4533, "task_loss": 0.7846980690956116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5423414707183838, "epoch": 3.83, "learning_rate": 3.083685545224007e-05, "loss": 0.517, "step": 4534, "task_loss": 0.36103442311286926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7944339513778687, "epoch": 3.83, "learning_rate": 3.0832628909551985e-05, "loss": 0.7502, "step": 4535, "task_loss": 2.01533579826355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5221424102783203, "epoch": 3.83, "learning_rate": 3.0828402366863905e-05, "loss": 0.4301, "step": 4536, "task_loss": 0.3656233549118042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5722428560256958, "epoch": 3.83, "learning_rate": 3.0824175824175825e-05, "loss": 0.5431, "step": 4537, "task_loss": 0.870442271232605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6565269827842712, "epoch": 3.84, "learning_rate": 3.0819949281487744e-05, "loss": 0.6714, "step": 4538, "task_loss": 0.4387611150741577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8450748920440674, "epoch": 3.84, "learning_rate": 3.0815722738799664e-05, "loss": 0.6987, "step": 4539, "task_loss": 1.4304630756378174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5021947622299194, "epoch": 3.84, "learning_rate": 3.0811496196111584e-05, "loss": 0.6695, "step": 4540, "task_loss": 0.7394357919692993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4328250586986542, "epoch": 3.84, "learning_rate": 3.08072696534235e-05, "loss": 0.4396, "step": 4541, "task_loss": 0.6554301977157593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43889665603637695, "epoch": 3.84, "learning_rate": 3.0803043110735416e-05, "loss": 0.5739, "step": 4542, "task_loss": 0.18864817917346954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6976079940795898, "epoch": 3.84, "learning_rate": 3.0798816568047336e-05, "loss": 0.5165, "step": 4543, "task_loss": 1.2544262409210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35558581352233887, "epoch": 3.84, "learning_rate": 3.079459002535926e-05, "loss": 0.5898, "step": 4544, "task_loss": 0.49586665630340576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5743883848190308, "epoch": 3.84, "learning_rate": 3.0790363482671176e-05, "loss": 0.7453, "step": 4545, "task_loss": 0.12546835839748383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6252610683441162, "epoch": 3.84, "learning_rate": 3.0786136939983095e-05, "loss": 0.5556, "step": 4546, "task_loss": 1.0304925441741943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.60959792137146, "epoch": 3.84, "learning_rate": 3.0781910397295015e-05, "loss": 0.5557, "step": 4547, "task_loss": 0.8270279169082642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6361767649650574, "epoch": 3.84, "learning_rate": 3.077768385460693e-05, "loss": 0.5612, "step": 4548, "task_loss": 1.0571876764297485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4474867582321167, "epoch": 3.84, "learning_rate": 3.0773457311918855e-05, "loss": 0.6831, "step": 4549, "task_loss": 1.0027767419815063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7085468769073486, "epoch": 3.85, "learning_rate": 3.0769230769230774e-05, "loss": 0.8437, "step": 4550, "task_loss": 0.6151509284973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3118171691894531, "epoch": 3.85, "learning_rate": 3.076500422654269e-05, "loss": 0.5076, "step": 4551, "task_loss": 0.4880085289478302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42318493127822876, "epoch": 3.85, "learning_rate": 3.076077768385461e-05, "loss": 0.5083, "step": 4552, "task_loss": 0.7853288054466248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4406645596027374, "epoch": 3.85, "learning_rate": 3.075655114116653e-05, "loss": 0.6061, "step": 4553, "task_loss": 1.2846165895462036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46522390842437744, "epoch": 3.85, "learning_rate": 3.0752324598478447e-05, "loss": 0.5885, "step": 4554, "task_loss": 0.6114926338195801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6775391697883606, "epoch": 3.85, "learning_rate": 3.0748098055790366e-05, "loss": 0.7544, "step": 4555, "task_loss": 1.5634747743606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3962712287902832, "epoch": 3.85, "learning_rate": 3.0743871513102286e-05, "loss": 0.4072, "step": 4556, "task_loss": 0.27858734130859375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6714860200881958, "epoch": 3.85, "learning_rate": 3.0739644970414206e-05, "loss": 0.4693, "step": 4557, "task_loss": 0.5121512413024902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3502795696258545, "epoch": 3.85, "learning_rate": 3.073541842772612e-05, "loss": 0.5174, "step": 4558, "task_loss": 0.18655872344970703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45405086874961853, "epoch": 3.85, "learning_rate": 3.073119188503804e-05, "loss": 0.5134, "step": 4559, "task_loss": 0.9049712419509888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6238760352134705, "epoch": 3.85, "learning_rate": 3.072696534234996e-05, "loss": 0.6217, "step": 4560, "task_loss": 1.1351268291473389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4105741083621979, "epoch": 3.85, "learning_rate": 3.072273879966188e-05, "loss": 0.4496, "step": 4561, "task_loss": 0.3665374517440796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8665479421615601, "epoch": 3.86, "learning_rate": 3.07185122569738e-05, "loss": 0.5432, "step": 4562, "task_loss": 0.38219016790390015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46830126643180847, "epoch": 3.86, "learning_rate": 3.071428571428572e-05, "loss": 0.6991, "step": 4563, "task_loss": 0.45985695719718933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6550844311714172, "epoch": 3.86, "learning_rate": 3.071005917159763e-05, "loss": 0.4581, "step": 4564, "task_loss": 0.3388121426105499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21673999726772308, "epoch": 3.86, "learning_rate": 3.070583262890955e-05, "loss": 0.4623, "step": 4565, "task_loss": 0.05698661878705025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6762908697128296, "epoch": 3.86, "learning_rate": 3.070160608622148e-05, "loss": 0.5875, "step": 4566, "task_loss": 0.6332026124000549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7190256118774414, "epoch": 3.86, "learning_rate": 3.069737954353339e-05, "loss": 0.6256, "step": 4567, "task_loss": 0.641409158706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5931517481803894, "epoch": 3.86, "learning_rate": 3.069315300084531e-05, "loss": 0.6252, "step": 4568, "task_loss": 0.719984233379364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17785121500492096, "epoch": 3.86, "learning_rate": 3.068892645815723e-05, "loss": 0.5192, "step": 4569, "task_loss": 0.07080139964818954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6851695775985718, "epoch": 3.86, "learning_rate": 3.068469991546914e-05, "loss": 0.722, "step": 4570, "task_loss": 0.7153067588806152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5196282863616943, "epoch": 3.86, "learning_rate": 3.068047337278107e-05, "loss": 0.5594, "step": 4571, "task_loss": 0.9176000356674194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28804445266723633, "epoch": 3.86, "learning_rate": 3.067624683009299e-05, "loss": 0.5854, "step": 4572, "task_loss": 0.4271126389503479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7349874377250671, "epoch": 3.87, "learning_rate": 3.067202028740491e-05, "loss": 0.5965, "step": 4573, "task_loss": 1.055741548538208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37242454290390015, "epoch": 3.87, "learning_rate": 3.066779374471682e-05, "loss": 0.5665, "step": 4574, "task_loss": 0.903777003288269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4188491106033325, "epoch": 3.87, "learning_rate": 3.066356720202874e-05, "loss": 0.6317, "step": 4575, "task_loss": 0.5148768424987793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37328606843948364, "epoch": 3.87, "learning_rate": 3.065934065934066e-05, "loss": 0.4836, "step": 4576, "task_loss": 0.6054843664169312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9590966701507568, "epoch": 3.87, "learning_rate": 3.065511411665258e-05, "loss": 0.6847, "step": 4577, "task_loss": 1.2013647556304932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6242749691009521, "epoch": 3.87, "learning_rate": 3.06508875739645e-05, "loss": 0.6302, "step": 4578, "task_loss": 0.7615480422973633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8141443729400635, "epoch": 3.87, "learning_rate": 3.064666103127642e-05, "loss": 0.9135, "step": 4579, "task_loss": 1.2536354064941406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45828643441200256, "epoch": 3.87, "learning_rate": 3.064243448858833e-05, "loss": 0.6748, "step": 4580, "task_loss": 0.5116100311279297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6139363646507263, "epoch": 3.87, "learning_rate": 3.063820794590025e-05, "loss": 0.5701, "step": 4581, "task_loss": 0.2785990536212921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4159098267555237, "epoch": 3.87, "learning_rate": 3.063398140321217e-05, "loss": 0.6065, "step": 4582, "task_loss": 0.14238207042217255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5898284912109375, "epoch": 3.87, "learning_rate": 3.062975486052409e-05, "loss": 0.5293, "step": 4583, "task_loss": 0.5675135254859924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6794606447219849, "epoch": 3.87, "learning_rate": 3.062552831783601e-05, "loss": 0.6289, "step": 4584, "task_loss": 0.6555403470993042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.518599271774292, "epoch": 3.88, "learning_rate": 3.062130177514793e-05, "loss": 0.6053, "step": 4585, "task_loss": 0.9533485770225525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1301783323287964, "epoch": 3.88, "learning_rate": 3.061707523245985e-05, "loss": 0.6882, "step": 4586, "task_loss": 0.8909433484077454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.387397825717926, "epoch": 3.88, "learning_rate": 3.0612848689771764e-05, "loss": 0.6066, "step": 4587, "task_loss": 0.02731383591890335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6907158493995667, "epoch": 3.88, "learning_rate": 3.060862214708369e-05, "loss": 0.5385, "step": 4588, "task_loss": 0.4953126311302185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7738770842552185, "epoch": 3.88, "learning_rate": 3.060439560439561e-05, "loss": 0.6411, "step": 4589, "task_loss": 1.83912193775177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8962497711181641, "epoch": 3.88, "learning_rate": 3.060016906170752e-05, "loss": 0.7956, "step": 4590, "task_loss": 1.5886213779449463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5566709041595459, "epoch": 3.88, "learning_rate": 3.059594251901944e-05, "loss": 0.6519, "step": 4591, "task_loss": 1.3151274919509888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6951749324798584, "epoch": 3.88, "learning_rate": 3.059171597633136e-05, "loss": 0.6482, "step": 4592, "task_loss": 1.7493023872375488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.793886661529541, "epoch": 3.88, "learning_rate": 3.058748943364328e-05, "loss": 0.6073, "step": 4593, "task_loss": 1.2700213193893433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.537638783454895, "epoch": 3.88, "learning_rate": 3.05832628909552e-05, "loss": 0.6938, "step": 4594, "task_loss": 0.9946596622467041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3894442021846771, "epoch": 3.88, "learning_rate": 3.057903634826712e-05, "loss": 0.6339, "step": 4595, "task_loss": 0.8490023612976074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5279010534286499, "epoch": 3.88, "learning_rate": 3.0574809805579035e-05, "loss": 0.6679, "step": 4596, "task_loss": 1.493714451789856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8200480937957764, "epoch": 3.89, "learning_rate": 3.0570583262890955e-05, "loss": 0.5328, "step": 4597, "task_loss": 0.49615558981895447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6064015626907349, "epoch": 3.89, "learning_rate": 3.0566356720202874e-05, "loss": 0.5919, "step": 4598, "task_loss": 0.19688251614570618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8986175060272217, "epoch": 3.89, "learning_rate": 3.0562130177514794e-05, "loss": 0.5528, "step": 4599, "task_loss": 0.6584007143974304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45789384841918945, "epoch": 3.89, "learning_rate": 3.0557903634826714e-05, "loss": 0.4642, "step": 4600, "task_loss": 0.8988075256347656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6040282249450684, "epoch": 3.89, "learning_rate": 3.0553677092138634e-05, "loss": 0.4742, "step": 4601, "task_loss": 0.4458990693092346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5014411211013794, "epoch": 3.89, "learning_rate": 3.054945054945055e-05, "loss": 0.5847, "step": 4602, "task_loss": 0.13124622404575348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7907803058624268, "epoch": 3.89, "learning_rate": 3.0545224006762466e-05, "loss": 0.807, "step": 4603, "task_loss": 0.2885737717151642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6017202138900757, "epoch": 3.89, "learning_rate": 3.0540997464074386e-05, "loss": 0.5377, "step": 4604, "task_loss": 1.2401753664016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9313364624977112, "epoch": 3.89, "learning_rate": 3.053677092138631e-05, "loss": 0.7706, "step": 4605, "task_loss": 1.7111760377883911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5182029008865356, "epoch": 3.89, "learning_rate": 3.0532544378698226e-05, "loss": 0.7943, "step": 4606, "task_loss": 0.41629770398139954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4889213442802429, "epoch": 3.89, "learning_rate": 3.0528317836010145e-05, "loss": 0.472, "step": 4607, "task_loss": 0.4720189571380615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5702635049819946, "epoch": 3.89, "learning_rate": 3.0524091293322065e-05, "loss": 0.5743, "step": 4608, "task_loss": 0.4291757643222809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4841800034046173, "epoch": 3.9, "learning_rate": 3.051986475063398e-05, "loss": 0.5174, "step": 4609, "task_loss": 0.578289270401001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4954565763473511, "epoch": 3.9, "learning_rate": 3.05156382079459e-05, "loss": 0.5167, "step": 4610, "task_loss": 0.7248358130455017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6871786117553711, "epoch": 3.9, "learning_rate": 3.051141166525782e-05, "loss": 0.5318, "step": 4611, "task_loss": 0.7253604531288147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2320185899734497, "epoch": 3.9, "learning_rate": 3.0507185122569737e-05, "loss": 0.7797, "step": 4612, "task_loss": 1.1038596630096436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5151013731956482, "epoch": 3.9, "learning_rate": 3.0502958579881657e-05, "loss": 0.5489, "step": 4613, "task_loss": 0.7845394611358643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5132116079330444, "epoch": 3.9, "learning_rate": 3.0498732037193577e-05, "loss": 0.6172, "step": 4614, "task_loss": 0.6595526933670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7027263045310974, "epoch": 3.9, "learning_rate": 3.04945054945055e-05, "loss": 0.5035, "step": 4615, "task_loss": 0.6024203300476074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4905238747596741, "epoch": 3.9, "learning_rate": 3.0490278951817413e-05, "loss": 0.6528, "step": 4616, "task_loss": 0.550947368144989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47260594367980957, "epoch": 3.9, "learning_rate": 3.0486052409129336e-05, "loss": 0.6501, "step": 4617, "task_loss": 1.2506953477859497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4075651168823242, "epoch": 3.9, "learning_rate": 3.0481825866441256e-05, "loss": 0.6616, "step": 4618, "task_loss": 0.8619855642318726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32177862524986267, "epoch": 3.9, "learning_rate": 3.047759932375317e-05, "loss": 0.6607, "step": 4619, "task_loss": 0.8519116640090942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5030864477157593, "epoch": 3.9, "learning_rate": 3.047337278106509e-05, "loss": 0.6319, "step": 4620, "task_loss": 1.0712083578109741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6509045362472534, "epoch": 3.91, "learning_rate": 3.046914623837701e-05, "loss": 0.6804, "step": 4621, "task_loss": 0.2627386748790741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5919709205627441, "epoch": 3.91, "learning_rate": 3.0464919695688924e-05, "loss": 0.6214, "step": 4622, "task_loss": 0.5721436142921448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7036287784576416, "epoch": 3.91, "learning_rate": 3.0460693153000848e-05, "loss": 0.7923, "step": 4623, "task_loss": 1.5684539079666138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9535712599754333, "epoch": 3.91, "learning_rate": 3.0456466610312767e-05, "loss": 0.6906, "step": 4624, "task_loss": 0.9885371327400208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6447042226791382, "epoch": 3.91, "learning_rate": 3.0452240067624684e-05, "loss": 0.7969, "step": 4625, "task_loss": 1.0202895402908325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3470914959907532, "epoch": 3.91, "learning_rate": 3.0448013524936603e-05, "loss": 0.4902, "step": 4626, "task_loss": 0.5662506818771362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8621094226837158, "epoch": 3.91, "learning_rate": 3.0443786982248523e-05, "loss": 0.7771, "step": 4627, "task_loss": 0.8232499361038208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6573700904846191, "epoch": 3.91, "learning_rate": 3.043956043956044e-05, "loss": 0.5985, "step": 4628, "task_loss": 1.3819470405578613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4911420941352844, "epoch": 3.91, "learning_rate": 3.043533389687236e-05, "loss": 0.5981, "step": 4629, "task_loss": 1.6177477836608887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5307059288024902, "epoch": 3.91, "learning_rate": 3.043110735418428e-05, "loss": 0.5653, "step": 4630, "task_loss": 0.9362890720367432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3964237570762634, "epoch": 3.91, "learning_rate": 3.04268808114962e-05, "loss": 0.6331, "step": 4631, "task_loss": 0.5317371487617493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7293582558631897, "epoch": 3.91, "learning_rate": 3.0422654268808115e-05, "loss": 0.6117, "step": 4632, "task_loss": 1.1754529476165771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5061864256858826, "epoch": 3.92, "learning_rate": 3.0418427726120035e-05, "loss": 0.5866, "step": 4633, "task_loss": 0.6947102546691895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39274168014526367, "epoch": 3.92, "learning_rate": 3.0414201183431958e-05, "loss": 0.5249, "step": 4634, "task_loss": 0.17787891626358032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4959263205528259, "epoch": 3.92, "learning_rate": 3.040997464074387e-05, "loss": 0.6468, "step": 4635, "task_loss": 1.0032756328582764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5696570873260498, "epoch": 3.92, "learning_rate": 3.040574809805579e-05, "loss": 0.5324, "step": 4636, "task_loss": 0.22751933336257935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8945135474205017, "epoch": 3.92, "learning_rate": 3.0401521555367714e-05, "loss": 0.7893, "step": 4637, "task_loss": 1.4217852354049683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5857203006744385, "epoch": 3.92, "learning_rate": 3.0397295012679627e-05, "loss": 0.5636, "step": 4638, "task_loss": 0.2735403776168823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3120154142379761, "epoch": 3.92, "learning_rate": 3.0393068469991546e-05, "loss": 0.6174, "step": 4639, "task_loss": 0.11848022043704987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3180572986602783, "epoch": 3.92, "learning_rate": 3.038884192730347e-05, "loss": 0.3543, "step": 4640, "task_loss": 0.5309938192367554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45571303367614746, "epoch": 3.92, "learning_rate": 3.0384615384615382e-05, "loss": 0.4835, "step": 4641, "task_loss": 0.3462219536304474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36800670623779297, "epoch": 3.92, "learning_rate": 3.0380388841927306e-05, "loss": 0.612, "step": 4642, "task_loss": 0.04561953619122505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1671844720840454, "epoch": 3.92, "learning_rate": 3.0376162299239225e-05, "loss": 0.7772, "step": 4643, "task_loss": 0.48168498277664185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47081223130226135, "epoch": 3.93, "learning_rate": 3.0371935756551145e-05, "loss": 0.4401, "step": 4644, "task_loss": 0.33101022243499756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2047979831695557, "epoch": 3.93, "learning_rate": 3.036770921386306e-05, "loss": 0.7449, "step": 4645, "task_loss": 0.6459980010986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3985995054244995, "epoch": 3.93, "learning_rate": 3.036348267117498e-05, "loss": 0.6065, "step": 4646, "task_loss": 0.14967221021652222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5883121490478516, "epoch": 3.93, "learning_rate": 3.03592561284869e-05, "loss": 0.7161, "step": 4647, "task_loss": 1.446795105934143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6932694911956787, "epoch": 3.93, "learning_rate": 3.0355029585798817e-05, "loss": 0.6715, "step": 4648, "task_loss": 1.0540391206741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.442544162273407, "epoch": 3.93, "learning_rate": 3.0350803043110737e-05, "loss": 0.5877, "step": 4649, "task_loss": 0.2704816460609436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.484758198261261, "epoch": 3.93, "learning_rate": 3.0346576500422657e-05, "loss": 0.5702, "step": 4650, "task_loss": 0.31382569670677185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6236105561256409, "epoch": 3.93, "learning_rate": 3.0342349957734573e-05, "loss": 0.6859, "step": 4651, "task_loss": 2.421828269958496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7948158979415894, "epoch": 3.93, "learning_rate": 3.0338123415046493e-05, "loss": 0.6787, "step": 4652, "task_loss": 0.23259636759757996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4680336117744446, "epoch": 3.93, "learning_rate": 3.0333896872358413e-05, "loss": 0.5938, "step": 4653, "task_loss": 1.1514549255371094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.701113760471344, "epoch": 3.93, "learning_rate": 3.032967032967033e-05, "loss": 0.6772, "step": 4654, "task_loss": 0.4720182716846466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8718936443328857, "epoch": 3.93, "learning_rate": 3.032544378698225e-05, "loss": 0.5461, "step": 4655, "task_loss": 0.4147320091724396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5229637026786804, "epoch": 3.94, "learning_rate": 3.032121724429417e-05, "loss": 0.4618, "step": 4656, "task_loss": 0.7907066345214844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47369498014450073, "epoch": 3.94, "learning_rate": 3.0316990701606085e-05, "loss": 0.4892, "step": 4657, "task_loss": 0.17236801981925964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5330003499984741, "epoch": 3.94, "learning_rate": 3.0312764158918004e-05, "loss": 0.577, "step": 4658, "task_loss": 1.0430312156677246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6459700465202332, "epoch": 3.94, "learning_rate": 3.0308537616229928e-05, "loss": 0.4586, "step": 4659, "task_loss": 0.35774412751197815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7663490176200867, "epoch": 3.94, "learning_rate": 3.0304311073541847e-05, "loss": 0.7398, "step": 4660, "task_loss": 1.2521777153015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48543113470077515, "epoch": 3.94, "learning_rate": 3.030008453085376e-05, "loss": 0.5859, "step": 4661, "task_loss": 1.53238046169281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6202825307846069, "epoch": 3.94, "learning_rate": 3.0295857988165683e-05, "loss": 0.5863, "step": 4662, "task_loss": 0.1928168684244156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4644743800163269, "epoch": 3.94, "learning_rate": 3.0291631445477603e-05, "loss": 0.5111, "step": 4663, "task_loss": 0.37532007694244385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4748108386993408, "epoch": 3.94, "learning_rate": 3.028740490278952e-05, "loss": 0.5409, "step": 4664, "task_loss": 0.2173890322446823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5967252254486084, "epoch": 3.94, "learning_rate": 3.028317836010144e-05, "loss": 0.5199, "step": 4665, "task_loss": 1.372541069984436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48646456003189087, "epoch": 3.94, "learning_rate": 3.027895181741336e-05, "loss": 0.5493, "step": 4666, "task_loss": 0.4623583257198334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6286332607269287, "epoch": 3.94, "learning_rate": 3.0274725274725275e-05, "loss": 0.634, "step": 4667, "task_loss": 1.0798989534378052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4886741638183594, "epoch": 3.95, "learning_rate": 3.0270498732037195e-05, "loss": 0.5292, "step": 4668, "task_loss": 0.8430002331733704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7612294554710388, "epoch": 3.95, "learning_rate": 3.0266272189349115e-05, "loss": 0.7897, "step": 4669, "task_loss": 1.52606201171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34037041664123535, "epoch": 3.95, "learning_rate": 3.026204564666103e-05, "loss": 0.5062, "step": 4670, "task_loss": 0.47254952788352966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.562741756439209, "epoch": 3.95, "learning_rate": 3.025781910397295e-05, "loss": 0.4963, "step": 4671, "task_loss": 0.7736961841583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27162396907806396, "epoch": 3.95, "learning_rate": 3.025359256128487e-05, "loss": 0.5941, "step": 4672, "task_loss": 0.8885185718536377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6105550527572632, "epoch": 3.95, "learning_rate": 3.024936601859679e-05, "loss": 0.6522, "step": 4673, "task_loss": 0.6301946640014648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6110946536064148, "epoch": 3.95, "learning_rate": 3.0245139475908707e-05, "loss": 0.7506, "step": 4674, "task_loss": 0.6913084983825684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7323060035705566, "epoch": 3.95, "learning_rate": 3.0240912933220626e-05, "loss": 0.8128, "step": 4675, "task_loss": 0.5939364433288574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3728654384613037, "epoch": 3.95, "learning_rate": 3.023668639053255e-05, "loss": 0.6279, "step": 4676, "task_loss": 0.8307532072067261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7955504059791565, "epoch": 3.95, "learning_rate": 3.0232459847844463e-05, "loss": 0.4841, "step": 4677, "task_loss": 0.854138970375061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42262211441993713, "epoch": 3.95, "learning_rate": 3.0228233305156382e-05, "loss": 0.3941, "step": 4678, "task_loss": 1.0354888439178467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5925530195236206, "epoch": 3.95, "learning_rate": 3.0224006762468305e-05, "loss": 0.5808, "step": 4679, "task_loss": 0.7890202403068542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7886413335800171, "epoch": 3.96, "learning_rate": 3.021978021978022e-05, "loss": 0.5881, "step": 4680, "task_loss": 1.035560131072998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5514804720878601, "epoch": 3.96, "learning_rate": 3.021555367709214e-05, "loss": 0.7138, "step": 4681, "task_loss": 0.2693343460559845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5306179523468018, "epoch": 3.96, "learning_rate": 3.021132713440406e-05, "loss": 0.6814, "step": 4682, "task_loss": 0.6615212559700012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5154081583023071, "epoch": 3.96, "learning_rate": 3.0207100591715974e-05, "loss": 0.5461, "step": 4683, "task_loss": 0.5905464887619019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7044644951820374, "epoch": 3.96, "learning_rate": 3.0202874049027897e-05, "loss": 0.69, "step": 4684, "task_loss": 1.3696662187576294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3044279217720032, "epoch": 3.96, "learning_rate": 3.0198647506339817e-05, "loss": 0.4241, "step": 4685, "task_loss": 0.5317029356956482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9315177798271179, "epoch": 3.96, "learning_rate": 3.019442096365173e-05, "loss": 0.7594, "step": 4686, "task_loss": 0.9961008429527283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48297902941703796, "epoch": 3.96, "learning_rate": 3.0190194420963653e-05, "loss": 0.4391, "step": 4687, "task_loss": 1.5042399168014526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7176103591918945, "epoch": 3.96, "learning_rate": 3.0185967878275573e-05, "loss": 0.6331, "step": 4688, "task_loss": 0.3522810935974121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5474249124526978, "epoch": 3.96, "learning_rate": 3.0181741335587493e-05, "loss": 0.5534, "step": 4689, "task_loss": 0.903675377368927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.578201174736023, "epoch": 3.96, "learning_rate": 3.017751479289941e-05, "loss": 0.5727, "step": 4690, "task_loss": 0.34195613861083984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6848090887069702, "epoch": 3.96, "learning_rate": 3.017328825021133e-05, "loss": 0.6145, "step": 4691, "task_loss": 1.541022539138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8923132419586182, "epoch": 3.97, "learning_rate": 3.016906170752325e-05, "loss": 0.6219, "step": 4692, "task_loss": 0.8312207460403442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6767929196357727, "epoch": 3.97, "learning_rate": 3.0164835164835165e-05, "loss": 0.7494, "step": 4693, "task_loss": 1.2637939453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3764417767524719, "epoch": 3.97, "learning_rate": 3.0160608622147085e-05, "loss": 0.503, "step": 4694, "task_loss": 0.7010582685470581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4886600375175476, "epoch": 3.97, "learning_rate": 3.0156382079459004e-05, "loss": 0.5096, "step": 4695, "task_loss": 0.35994237661361694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5162187814712524, "epoch": 3.97, "learning_rate": 3.015215553677092e-05, "loss": 0.5042, "step": 4696, "task_loss": 0.9106240272521973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3719951808452606, "epoch": 3.97, "learning_rate": 3.014792899408284e-05, "loss": 0.4696, "step": 4697, "task_loss": 0.425300270318985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5133061408996582, "epoch": 3.97, "learning_rate": 3.0143702451394764e-05, "loss": 0.4871, "step": 4698, "task_loss": 1.1612643003463745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5151873826980591, "epoch": 3.97, "learning_rate": 3.0139475908706676e-05, "loss": 0.5259, "step": 4699, "task_loss": 0.6632486581802368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26280128955841064, "epoch": 3.97, "learning_rate": 3.0135249366018596e-05, "loss": 0.4463, "step": 4700, "task_loss": 0.6082665920257568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47703397274017334, "epoch": 3.97, "learning_rate": 3.013102282333052e-05, "loss": 0.6013, "step": 4701, "task_loss": 0.7142811417579651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7605900764465332, "epoch": 3.97, "learning_rate": 3.012679628064244e-05, "loss": 0.6102, "step": 4702, "task_loss": 0.35928454995155334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8163631558418274, "epoch": 3.97, "learning_rate": 3.0122569737954352e-05, "loss": 0.736, "step": 4703, "task_loss": 0.5575838088989258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5625446438789368, "epoch": 3.98, "learning_rate": 3.0118343195266275e-05, "loss": 0.7405, "step": 4704, "task_loss": 0.39987924695014954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5083070397377014, "epoch": 3.98, "learning_rate": 3.0114116652578195e-05, "loss": 0.6836, "step": 4705, "task_loss": 0.6831318140029907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6689057946205139, "epoch": 3.98, "learning_rate": 3.010989010989011e-05, "loss": 0.6175, "step": 4706, "task_loss": 0.9765880703926086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.788018524646759, "epoch": 3.98, "learning_rate": 3.010566356720203e-05, "loss": 0.6306, "step": 4707, "task_loss": 0.49589768052101135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.613419771194458, "epoch": 3.98, "learning_rate": 3.010143702451395e-05, "loss": 0.6317, "step": 4708, "task_loss": 0.5823269486427307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.561852216720581, "epoch": 3.98, "learning_rate": 3.0097210481825867e-05, "loss": 0.6183, "step": 4709, "task_loss": 0.5153210163116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6073218584060669, "epoch": 3.98, "learning_rate": 3.0092983939137787e-05, "loss": 0.7494, "step": 4710, "task_loss": 0.6586194038391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27364081144332886, "epoch": 3.98, "learning_rate": 3.0088757396449707e-05, "loss": 0.5944, "step": 4711, "task_loss": 0.3483925461769104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5581334829330444, "epoch": 3.98, "learning_rate": 3.0084530853761623e-05, "loss": 0.6322, "step": 4712, "task_loss": 1.072516918182373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3722652792930603, "epoch": 3.98, "learning_rate": 3.0080304311073543e-05, "loss": 0.4809, "step": 4713, "task_loss": 0.7023463249206543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6974066495895386, "epoch": 3.98, "learning_rate": 3.0076077768385462e-05, "loss": 0.8307, "step": 4714, "task_loss": 0.6060879826545715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45256704092025757, "epoch": 3.99, "learning_rate": 3.007185122569738e-05, "loss": 0.5379, "step": 4715, "task_loss": 0.5103746652603149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40717703104019165, "epoch": 3.99, "learning_rate": 3.00676246830093e-05, "loss": 0.6681, "step": 4716, "task_loss": 0.19918246567249298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5772171020507812, "epoch": 3.99, "learning_rate": 3.0063398140321218e-05, "loss": 0.5883, "step": 4717, "task_loss": 0.4013548493385315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5430459380149841, "epoch": 3.99, "learning_rate": 3.005917159763314e-05, "loss": 0.5283, "step": 4718, "task_loss": 0.3575628995895386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33911755681037903, "epoch": 3.99, "learning_rate": 3.0054945054945054e-05, "loss": 0.6451, "step": 4719, "task_loss": 0.2961440980434418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5955850481987, "epoch": 3.99, "learning_rate": 3.0050718512256974e-05, "loss": 0.5115, "step": 4720, "task_loss": 0.9544125199317932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40576303005218506, "epoch": 3.99, "learning_rate": 3.0046491969568897e-05, "loss": 0.4771, "step": 4721, "task_loss": 0.7533657550811768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9449419975280762, "epoch": 3.99, "learning_rate": 3.004226542688081e-05, "loss": 0.6291, "step": 4722, "task_loss": 1.2539595365524292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2913803160190582, "epoch": 3.99, "learning_rate": 3.0038038884192733e-05, "loss": 0.5302, "step": 4723, "task_loss": 0.43162453174591064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36746746301651, "epoch": 3.99, "learning_rate": 3.0033812341504653e-05, "loss": 0.5239, "step": 4724, "task_loss": 0.5555220246315002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7682191133499146, "epoch": 3.99, "learning_rate": 3.0029585798816566e-05, "loss": 0.6528, "step": 4725, "task_loss": 0.3656502068042755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5036412477493286, "epoch": 3.99, "learning_rate": 3.002535925612849e-05, "loss": 0.4916, "step": 4726, "task_loss": 0.2712375819683075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9559680223464966, "epoch": 4.0, "learning_rate": 3.002113271344041e-05, "loss": 0.6769, "step": 4727, "task_loss": 1.4650204181671143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6178359985351562, "epoch": 4.0, "learning_rate": 3.0016906170752325e-05, "loss": 0.5574, "step": 4728, "task_loss": 1.5286884307861328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19341501593589783, "epoch": 4.0, "learning_rate": 3.0012679628064245e-05, "loss": 0.3833, "step": 4729, "task_loss": 0.07825923711061478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7431408166885376, "epoch": 4.0, "learning_rate": 3.0008453085376165e-05, "loss": 0.8137, "step": 4730, "task_loss": 1.092724323272705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8021806478500366, "epoch": 4.0, "learning_rate": 3.0004226542688084e-05, "loss": 0.5668, "step": 4731, "task_loss": 1.0776853561401367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5641326904296875, "epoch": 4.0, "learning_rate": 3e-05, "loss": 0.6357, "step": 4732, "task_loss": 0.3521232008934021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39596685767173767, "epoch": 4.0, "learning_rate": 2.999577345731192e-05, "loss": 0.7313, "step": 4733, "task_loss": 0.8014976978302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7462252974510193, "epoch": 4.0, "learning_rate": 2.999154691462384e-05, "loss": 0.691, "step": 4734, "task_loss": 1.1207897663116455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4042583703994751, "epoch": 4.0, "learning_rate": 2.9987320371935757e-05, "loss": 0.5421, "step": 4735, "task_loss": 1.1607073545455933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6367651224136353, "epoch": 4.0, "learning_rate": 2.9983093829247676e-05, "loss": 0.5674, "step": 4736, "task_loss": 1.2157838344573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4351539611816406, "epoch": 4.0, "learning_rate": 2.9978867286559596e-05, "loss": 0.5, "step": 4737, "task_loss": 0.20542965829372406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5558850169181824, "epoch": 4.01, "learning_rate": 2.9974640743871512e-05, "loss": 0.4686, "step": 4738, "task_loss": 0.48375123739242554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7042853236198425, "epoch": 4.01, "learning_rate": 2.9970414201183432e-05, "loss": 0.6005, "step": 4739, "task_loss": 0.40970611572265625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34301328659057617, "epoch": 4.01, "learning_rate": 2.9966187658495355e-05, "loss": 0.5167, "step": 4740, "task_loss": 0.31376785039901733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4341970682144165, "epoch": 4.01, "learning_rate": 2.9961961115807268e-05, "loss": 0.5621, "step": 4741, "task_loss": 0.2768462896347046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8227153420448303, "epoch": 4.01, "learning_rate": 2.9957734573119188e-05, "loss": 0.6527, "step": 4742, "task_loss": 0.26090165972709656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28555893898010254, "epoch": 4.01, "learning_rate": 2.995350803043111e-05, "loss": 0.5548, "step": 4743, "task_loss": 0.05753535404801369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41741886734962463, "epoch": 4.01, "learning_rate": 2.9949281487743024e-05, "loss": 0.5999, "step": 4744, "task_loss": 0.6317171454429626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45832276344299316, "epoch": 4.01, "learning_rate": 2.9945054945054947e-05, "loss": 0.5088, "step": 4745, "task_loss": 0.4320179224014282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4288496971130371, "epoch": 4.01, "learning_rate": 2.9940828402366867e-05, "loss": 0.4188, "step": 4746, "task_loss": 0.2928406000137329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0759358406066895, "epoch": 4.01, "learning_rate": 2.9936601859678787e-05, "loss": 0.7903, "step": 4747, "task_loss": 0.31203359365463257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5567479133605957, "epoch": 4.01, "learning_rate": 2.9932375316990703e-05, "loss": 0.7488, "step": 4748, "task_loss": 0.7170420289039612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0591440200805664, "epoch": 4.01, "learning_rate": 2.9928148774302623e-05, "loss": 0.7233, "step": 4749, "task_loss": 0.6277914643287659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3386024832725525, "epoch": 4.02, "learning_rate": 2.9923922231614543e-05, "loss": 0.4745, "step": 4750, "task_loss": 0.5792732238769531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3787451684474945, "epoch": 4.02, "learning_rate": 2.991969568892646e-05, "loss": 0.4155, "step": 4751, "task_loss": 0.39815253019332886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6467211842536926, "epoch": 4.02, "learning_rate": 2.991546914623838e-05, "loss": 0.7457, "step": 4752, "task_loss": 0.872768223285675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5191845893859863, "epoch": 4.02, "learning_rate": 2.99112426035503e-05, "loss": 0.7095, "step": 4753, "task_loss": 0.8604825735092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46322154998779297, "epoch": 4.02, "learning_rate": 2.9907016060862215e-05, "loss": 0.6925, "step": 4754, "task_loss": 0.13558031618595123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5248489379882812, "epoch": 4.02, "learning_rate": 2.9902789518174134e-05, "loss": 0.5462, "step": 4755, "task_loss": 0.6144562363624573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3355175256729126, "epoch": 4.02, "learning_rate": 2.9898562975486054e-05, "loss": 0.647, "step": 4756, "task_loss": 0.44851887226104736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6245940923690796, "epoch": 4.02, "learning_rate": 2.989433643279797e-05, "loss": 0.4949, "step": 4757, "task_loss": 0.978074848651886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38768285512924194, "epoch": 4.02, "learning_rate": 2.989010989010989e-05, "loss": 0.6135, "step": 4758, "task_loss": 0.23951290547847748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5809382200241089, "epoch": 4.02, "learning_rate": 2.988588334742181e-05, "loss": 0.5323, "step": 4759, "task_loss": 0.8166263699531555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3891773521900177, "epoch": 4.02, "learning_rate": 2.9881656804733733e-05, "loss": 0.4029, "step": 4760, "task_loss": 0.4305973947048187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0928038358688354, "epoch": 4.02, "learning_rate": 2.9877430262045646e-05, "loss": 0.651, "step": 4761, "task_loss": 1.4423768520355225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5874948501586914, "epoch": 4.03, "learning_rate": 2.987320371935757e-05, "loss": 0.6702, "step": 4762, "task_loss": 0.8745338916778564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.682579755783081, "epoch": 4.03, "learning_rate": 2.986897717666949e-05, "loss": 0.5267, "step": 4763, "task_loss": 0.46137967705726624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5621275305747986, "epoch": 4.03, "learning_rate": 2.9864750633981402e-05, "loss": 0.5376, "step": 4764, "task_loss": 0.43116816878318787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5357831716537476, "epoch": 4.03, "learning_rate": 2.9860524091293325e-05, "loss": 0.6038, "step": 4765, "task_loss": 0.593547523021698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33915096521377563, "epoch": 4.03, "learning_rate": 2.9856297548605245e-05, "loss": 0.554, "step": 4766, "task_loss": 0.6328922510147095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4777359962463379, "epoch": 4.03, "learning_rate": 2.9852071005917158e-05, "loss": 0.4684, "step": 4767, "task_loss": 0.12932059168815613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4044278860092163, "epoch": 4.03, "learning_rate": 2.984784446322908e-05, "loss": 0.3792, "step": 4768, "task_loss": 0.45302632451057434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5154479742050171, "epoch": 4.03, "learning_rate": 2.9843617920541e-05, "loss": 0.367, "step": 4769, "task_loss": 0.4945833384990692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8677091002464294, "epoch": 4.03, "learning_rate": 2.9839391377852917e-05, "loss": 0.7079, "step": 4770, "task_loss": 0.4092583656311035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24866575002670288, "epoch": 4.03, "learning_rate": 2.9835164835164837e-05, "loss": 0.3416, "step": 4771, "task_loss": 0.21698196232318878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37771540880203247, "epoch": 4.03, "learning_rate": 2.9830938292476756e-05, "loss": 0.4638, "step": 4772, "task_loss": 0.4372885227203369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.278551459312439, "epoch": 4.03, "learning_rate": 2.9826711749788673e-05, "loss": 0.6601, "step": 4773, "task_loss": 1.5556211471557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36623716354370117, "epoch": 4.04, "learning_rate": 2.9822485207100593e-05, "loss": 0.4309, "step": 4774, "task_loss": 0.3765921890735626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4066579043865204, "epoch": 4.04, "learning_rate": 2.9818258664412512e-05, "loss": 0.455, "step": 4775, "task_loss": 0.5585378408432007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4618692100048065, "epoch": 4.04, "learning_rate": 2.9814032121724432e-05, "loss": 0.5495, "step": 4776, "task_loss": 0.5024696588516235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3278048634529114, "epoch": 4.04, "learning_rate": 2.980980557903635e-05, "loss": 0.4986, "step": 4777, "task_loss": 1.075730800628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5876538753509521, "epoch": 4.04, "learning_rate": 2.9805579036348268e-05, "loss": 0.6961, "step": 4778, "task_loss": 0.7248564958572388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6468080878257751, "epoch": 4.04, "learning_rate": 2.980135249366019e-05, "loss": 0.5761, "step": 4779, "task_loss": 1.5820600986480713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3380969166755676, "epoch": 4.04, "learning_rate": 2.9797125950972104e-05, "loss": 0.5324, "step": 4780, "task_loss": 0.9099416136741638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6064426898956299, "epoch": 4.04, "learning_rate": 2.9792899408284024e-05, "loss": 0.5628, "step": 4781, "task_loss": 0.8687354326248169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4320724606513977, "epoch": 4.04, "learning_rate": 2.9788672865595947e-05, "loss": 0.5117, "step": 4782, "task_loss": 0.48203763365745544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4345114529132843, "epoch": 4.04, "learning_rate": 2.978444632290786e-05, "loss": 0.3607, "step": 4783, "task_loss": 0.5369747877120972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40863925218582153, "epoch": 4.04, "learning_rate": 2.978021978021978e-05, "loss": 0.4975, "step": 4784, "task_loss": 0.716952919960022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.471333384513855, "epoch": 4.04, "learning_rate": 2.9775993237531703e-05, "loss": 0.6046, "step": 4785, "task_loss": 0.4160959720611572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5143387913703918, "epoch": 4.05, "learning_rate": 2.9771766694843616e-05, "loss": 0.5247, "step": 4786, "task_loss": 0.6719158291816711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3797294795513153, "epoch": 4.05, "learning_rate": 2.976754015215554e-05, "loss": 0.6387, "step": 4787, "task_loss": 0.6243707537651062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5103291273117065, "epoch": 4.05, "learning_rate": 2.976331360946746e-05, "loss": 0.4673, "step": 4788, "task_loss": 0.9570773839950562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5803094506263733, "epoch": 4.05, "learning_rate": 2.975908706677938e-05, "loss": 0.5807, "step": 4789, "task_loss": 0.5373027920722961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9603193998336792, "epoch": 4.05, "learning_rate": 2.9754860524091295e-05, "loss": 0.8264, "step": 4790, "task_loss": 0.5363649129867554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20841357111930847, "epoch": 4.05, "learning_rate": 2.9750633981403215e-05, "loss": 0.3852, "step": 4791, "task_loss": 0.09408742934465408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3148472309112549, "epoch": 4.05, "learning_rate": 2.9746407438715134e-05, "loss": 0.4742, "step": 4792, "task_loss": 0.8518476486206055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5975577235221863, "epoch": 4.05, "learning_rate": 2.974218089602705e-05, "loss": 0.519, "step": 4793, "task_loss": 0.551813542842865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3116340637207031, "epoch": 4.05, "learning_rate": 2.973795435333897e-05, "loss": 0.4622, "step": 4794, "task_loss": 0.10413404554128647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3479987680912018, "epoch": 4.05, "learning_rate": 2.973372781065089e-05, "loss": 0.4832, "step": 4795, "task_loss": 0.023804357275366783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8424704074859619, "epoch": 4.05, "learning_rate": 2.9729501267962806e-05, "loss": 0.5387, "step": 4796, "task_loss": 0.8505396842956543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.439983069896698, "epoch": 4.05, "learning_rate": 2.9725274725274726e-05, "loss": 0.4352, "step": 4797, "task_loss": 0.8187170624732971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35773593187332153, "epoch": 4.06, "learning_rate": 2.9721048182586646e-05, "loss": 0.4926, "step": 4798, "task_loss": 0.3739623427391052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42781004309654236, "epoch": 4.06, "learning_rate": 2.9716821639898562e-05, "loss": 0.444, "step": 4799, "task_loss": 0.641234278678894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7133920192718506, "epoch": 4.06, "learning_rate": 2.9712595097210482e-05, "loss": 0.5427, "step": 4800, "task_loss": 0.1334468424320221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5091303586959839, "epoch": 4.06, "learning_rate": 2.9708368554522402e-05, "loss": 0.5804, "step": 4801, "task_loss": 0.9751131534576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26588380336761475, "epoch": 4.06, "learning_rate": 2.9704142011834318e-05, "loss": 0.5459, "step": 4802, "task_loss": 0.09333113580942154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37200456857681274, "epoch": 4.06, "learning_rate": 2.9699915469146238e-05, "loss": 0.5496, "step": 4803, "task_loss": 0.33158430457115173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4736981689929962, "epoch": 4.06, "learning_rate": 2.969568892645816e-05, "loss": 0.5455, "step": 4804, "task_loss": 1.27243971824646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20986230671405792, "epoch": 4.06, "learning_rate": 2.969146238377008e-05, "loss": 0.5106, "step": 4805, "task_loss": 0.2970461845397949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43755829334259033, "epoch": 4.06, "learning_rate": 2.9687235841081994e-05, "loss": 0.6134, "step": 4806, "task_loss": 0.5337417721748352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0892823934555054, "epoch": 4.06, "learning_rate": 2.9683009298393917e-05, "loss": 0.7501, "step": 4807, "task_loss": 0.5199514031410217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6703680753707886, "epoch": 4.06, "learning_rate": 2.9678782755705837e-05, "loss": 0.763, "step": 4808, "task_loss": 0.771691620349884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8760867118835449, "epoch": 4.07, "learning_rate": 2.9674556213017753e-05, "loss": 0.5981, "step": 4809, "task_loss": 1.4003020524978638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36402013897895813, "epoch": 4.07, "learning_rate": 2.9670329670329673e-05, "loss": 0.5352, "step": 4810, "task_loss": 0.2539736032485962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2363978624343872, "epoch": 4.07, "learning_rate": 2.9666103127641592e-05, "loss": 0.5672, "step": 4811, "task_loss": 0.3959346413612366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3834642767906189, "epoch": 4.07, "learning_rate": 2.966187658495351e-05, "loss": 0.3952, "step": 4812, "task_loss": 1.3377703428268433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39688557386398315, "epoch": 4.07, "learning_rate": 2.965765004226543e-05, "loss": 0.5489, "step": 4813, "task_loss": 0.824809730052948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5481680631637573, "epoch": 4.07, "learning_rate": 2.9653423499577348e-05, "loss": 0.6434, "step": 4814, "task_loss": 0.7854126691818237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5738096833229065, "epoch": 4.07, "learning_rate": 2.9649196956889265e-05, "loss": 0.5896, "step": 4815, "task_loss": 0.18337300419807434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4052640199661255, "epoch": 4.07, "learning_rate": 2.9644970414201184e-05, "loss": 0.4623, "step": 4816, "task_loss": 0.24387280642986298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5931398868560791, "epoch": 4.07, "learning_rate": 2.9640743871513104e-05, "loss": 0.638, "step": 4817, "task_loss": 1.1006051301956177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.537839412689209, "epoch": 4.07, "learning_rate": 2.9636517328825024e-05, "loss": 0.4506, "step": 4818, "task_loss": 0.612963080406189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.760231614112854, "epoch": 4.07, "learning_rate": 2.963229078613694e-05, "loss": 0.4802, "step": 4819, "task_loss": 0.6048200726509094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5098378658294678, "epoch": 4.07, "learning_rate": 2.962806424344886e-05, "loss": 0.4477, "step": 4820, "task_loss": 0.5537385940551758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38817596435546875, "epoch": 4.08, "learning_rate": 2.9623837700760783e-05, "loss": 0.5129, "step": 4821, "task_loss": 0.5576894283294678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4731392562389374, "epoch": 4.08, "learning_rate": 2.9619611158072696e-05, "loss": 0.706, "step": 4822, "task_loss": 0.5801501274108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4706273376941681, "epoch": 4.08, "learning_rate": 2.9615384615384616e-05, "loss": 0.5677, "step": 4823, "task_loss": 0.632493793964386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4360937476158142, "epoch": 4.08, "learning_rate": 2.961115807269654e-05, "loss": 0.5073, "step": 4824, "task_loss": 0.4308849275112152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5204259157180786, "epoch": 4.08, "learning_rate": 2.9606931530008452e-05, "loss": 0.5955, "step": 4825, "task_loss": 0.6966062188148499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8863681554794312, "epoch": 4.08, "learning_rate": 2.9602704987320375e-05, "loss": 0.5645, "step": 4826, "task_loss": 0.6026942729949951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3148103952407837, "epoch": 4.08, "learning_rate": 2.9598478444632295e-05, "loss": 0.5269, "step": 4827, "task_loss": 0.35631802678108215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5453001856803894, "epoch": 4.08, "learning_rate": 2.9594251901944208e-05, "loss": 0.6453, "step": 4828, "task_loss": 0.5687766075134277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36892998218536377, "epoch": 4.08, "learning_rate": 2.959002535925613e-05, "loss": 0.5251, "step": 4829, "task_loss": 0.6104440093040466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39301788806915283, "epoch": 4.08, "learning_rate": 2.958579881656805e-05, "loss": 0.4952, "step": 4830, "task_loss": 0.9034949541091919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34641480445861816, "epoch": 4.08, "learning_rate": 2.9581572273879963e-05, "loss": 0.5685, "step": 4831, "task_loss": 0.6105297207832336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6269800662994385, "epoch": 4.08, "learning_rate": 2.9577345731191887e-05, "loss": 0.5792, "step": 4832, "task_loss": 1.1485555171966553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4222157895565033, "epoch": 4.09, "learning_rate": 2.9573119188503806e-05, "loss": 0.6112, "step": 4833, "task_loss": 1.414275050163269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3817088305950165, "epoch": 4.09, "learning_rate": 2.9568892645815726e-05, "loss": 0.4512, "step": 4834, "task_loss": 0.7417781949043274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4314614534378052, "epoch": 4.09, "learning_rate": 2.9564666103127642e-05, "loss": 0.4335, "step": 4835, "task_loss": 0.5572116374969482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.669198751449585, "epoch": 4.09, "learning_rate": 2.9560439560439562e-05, "loss": 0.7072, "step": 4836, "task_loss": 1.3890420198440552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42207905650138855, "epoch": 4.09, "learning_rate": 2.9556213017751482e-05, "loss": 0.516, "step": 4837, "task_loss": 0.761085033416748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7370420694351196, "epoch": 4.09, "learning_rate": 2.9551986475063398e-05, "loss": 0.7362, "step": 4838, "task_loss": 1.9911420345306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5360132455825806, "epoch": 4.09, "learning_rate": 2.9547759932375318e-05, "loss": 0.6671, "step": 4839, "task_loss": 0.6991643309593201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6822996139526367, "epoch": 4.09, "learning_rate": 2.9543533389687238e-05, "loss": 0.5954, "step": 4840, "task_loss": 0.5343037247657776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38074225187301636, "epoch": 4.09, "learning_rate": 2.9539306846999154e-05, "loss": 0.5714, "step": 4841, "task_loss": 0.8787013292312622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5015571117401123, "epoch": 4.09, "learning_rate": 2.9535080304311074e-05, "loss": 0.6689, "step": 4842, "task_loss": 0.6182490587234497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814687430858612, "epoch": 4.09, "learning_rate": 2.9530853761622997e-05, "loss": 0.4261, "step": 4843, "task_loss": 0.8015026450157166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6589394807815552, "epoch": 4.09, "learning_rate": 2.952662721893491e-05, "loss": 0.5177, "step": 4844, "task_loss": 0.6222488880157471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6663868427276611, "epoch": 4.1, "learning_rate": 2.952240067624683e-05, "loss": 0.5494, "step": 4845, "task_loss": 1.3003631830215454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.584073543548584, "epoch": 4.1, "learning_rate": 2.9518174133558753e-05, "loss": 0.6328, "step": 4846, "task_loss": 0.6526092886924744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7385438680648804, "epoch": 4.1, "learning_rate": 2.9513947590870672e-05, "loss": 0.487, "step": 4847, "task_loss": 0.4470556974411011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4144344925880432, "epoch": 4.1, "learning_rate": 2.9509721048182585e-05, "loss": 0.6132, "step": 4848, "task_loss": 0.8664945363998413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7791794538497925, "epoch": 4.1, "learning_rate": 2.950549450549451e-05, "loss": 0.6175, "step": 4849, "task_loss": 0.5026871562004089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5542319416999817, "epoch": 4.1, "learning_rate": 2.9501267962806428e-05, "loss": 0.5481, "step": 4850, "task_loss": 0.5554592609405518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32958918809890747, "epoch": 4.1, "learning_rate": 2.9497041420118345e-05, "loss": 0.4215, "step": 4851, "task_loss": 0.2574290931224823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4067208766937256, "epoch": 4.1, "learning_rate": 2.9492814877430264e-05, "loss": 0.5335, "step": 4852, "task_loss": 0.6319681406021118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35255634784698486, "epoch": 4.1, "learning_rate": 2.9488588334742184e-05, "loss": 0.462, "step": 4853, "task_loss": 0.22451795637607574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5688884258270264, "epoch": 4.1, "learning_rate": 2.94843617920541e-05, "loss": 0.5155, "step": 4854, "task_loss": 0.1819257140159607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5997670292854309, "epoch": 4.1, "learning_rate": 2.948013524936602e-05, "loss": 0.7225, "step": 4855, "task_loss": 0.46337664127349854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45964354276657104, "epoch": 4.1, "learning_rate": 2.947590870667794e-05, "loss": 0.4617, "step": 4856, "task_loss": 0.6211599111557007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9337481260299683, "epoch": 4.11, "learning_rate": 2.9471682163989856e-05, "loss": 0.6404, "step": 4857, "task_loss": 0.7539032697677612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9196542501449585, "epoch": 4.11, "learning_rate": 2.9467455621301776e-05, "loss": 0.7256, "step": 4858, "task_loss": 1.547803521156311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.670647144317627, "epoch": 4.11, "learning_rate": 2.9463229078613696e-05, "loss": 0.4658, "step": 4859, "task_loss": 0.6859316229820251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8582586050033569, "epoch": 4.11, "learning_rate": 2.9459002535925612e-05, "loss": 0.5858, "step": 4860, "task_loss": 0.40182241797447205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6735626459121704, "epoch": 4.11, "learning_rate": 2.9454775993237532e-05, "loss": 0.5826, "step": 4861, "task_loss": 1.1858372688293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7336824536323547, "epoch": 4.11, "learning_rate": 2.945054945054945e-05, "loss": 0.6855, "step": 4862, "task_loss": 1.2073827981948853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6279140114784241, "epoch": 4.11, "learning_rate": 2.9446322907861375e-05, "loss": 0.4896, "step": 4863, "task_loss": 1.4833605289459229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4476397633552551, "epoch": 4.11, "learning_rate": 2.9442096365173288e-05, "loss": 0.462, "step": 4864, "task_loss": 0.9270361661911011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0851538181304932, "epoch": 4.11, "learning_rate": 2.9437869822485207e-05, "loss": 0.7726, "step": 4865, "task_loss": 0.8774154782295227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4475991129875183, "epoch": 4.11, "learning_rate": 2.943364327979713e-05, "loss": 0.4685, "step": 4866, "task_loss": 0.8371140360832214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6269153952598572, "epoch": 4.11, "learning_rate": 2.9429416737109043e-05, "loss": 0.5023, "step": 4867, "task_loss": 0.9607031345367432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49514877796173096, "epoch": 4.11, "learning_rate": 2.9425190194420967e-05, "loss": 0.5629, "step": 4868, "task_loss": 1.8721123933792114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16147325932979584, "epoch": 4.12, "learning_rate": 2.9420963651732886e-05, "loss": 0.4341, "step": 4869, "task_loss": 0.048075880855321884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7240965366363525, "epoch": 4.12, "learning_rate": 2.94167371090448e-05, "loss": 0.6844, "step": 4870, "task_loss": 0.9053167700767517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5797220468521118, "epoch": 4.12, "learning_rate": 2.9412510566356722e-05, "loss": 0.6192, "step": 4871, "task_loss": 0.602137565612793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6932028532028198, "epoch": 4.12, "learning_rate": 2.9408284023668642e-05, "loss": 0.5983, "step": 4872, "task_loss": 0.8830226063728333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5245742797851562, "epoch": 4.12, "learning_rate": 2.940405748098056e-05, "loss": 0.3909, "step": 4873, "task_loss": 0.4560994505882263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4908671975135803, "epoch": 4.12, "learning_rate": 2.9399830938292478e-05, "loss": 0.4516, "step": 4874, "task_loss": 0.7425429224967957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5652753114700317, "epoch": 4.12, "learning_rate": 2.9395604395604398e-05, "loss": 0.6313, "step": 4875, "task_loss": 0.33220943808555603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48626336455345154, "epoch": 4.12, "learning_rate": 2.9391377852916318e-05, "loss": 0.442, "step": 4876, "task_loss": 0.45793113112449646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7293793559074402, "epoch": 4.12, "learning_rate": 2.9387151310228234e-05, "loss": 0.5456, "step": 4877, "task_loss": 1.1115078926086426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5300695896148682, "epoch": 4.12, "learning_rate": 2.9382924767540154e-05, "loss": 0.5893, "step": 4878, "task_loss": 0.3955920934677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35069793462753296, "epoch": 4.12, "learning_rate": 2.9378698224852074e-05, "loss": 0.542, "step": 4879, "task_loss": 0.39194342494010925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.311199814081192, "epoch": 4.13, "learning_rate": 2.937447168216399e-05, "loss": 0.5636, "step": 4880, "task_loss": 0.1707916259765625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31814074516296387, "epoch": 4.13, "learning_rate": 2.937024513947591e-05, "loss": 0.4404, "step": 4881, "task_loss": 0.18268778920173645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6300954222679138, "epoch": 4.13, "learning_rate": 2.936601859678783e-05, "loss": 0.6501, "step": 4882, "task_loss": 1.3882330656051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.305836945772171, "epoch": 4.13, "learning_rate": 2.9361792054099746e-05, "loss": 0.5197, "step": 4883, "task_loss": 1.227432131767273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6732820272445679, "epoch": 4.13, "learning_rate": 2.9357565511411666e-05, "loss": 0.583, "step": 4884, "task_loss": 0.816556990146637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5725735425949097, "epoch": 4.13, "learning_rate": 2.935333896872359e-05, "loss": 0.455, "step": 4885, "task_loss": 0.1542321741580963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5702890157699585, "epoch": 4.13, "learning_rate": 2.93491124260355e-05, "loss": 0.5776, "step": 4886, "task_loss": 0.2596447765827179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33251917362213135, "epoch": 4.13, "learning_rate": 2.934488588334742e-05, "loss": 0.3957, "step": 4887, "task_loss": 0.1093897745013237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4518659710884094, "epoch": 4.13, "learning_rate": 2.9340659340659344e-05, "loss": 0.5479, "step": 4888, "task_loss": 0.7237752079963684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36843740940093994, "epoch": 4.13, "learning_rate": 2.9336432797971257e-05, "loss": 0.4508, "step": 4889, "task_loss": 0.15715515613555908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4993230104446411, "epoch": 4.13, "learning_rate": 2.933220625528318e-05, "loss": 0.6884, "step": 4890, "task_loss": 1.0155936479568481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4642319083213806, "epoch": 4.13, "learning_rate": 2.93279797125951e-05, "loss": 0.6264, "step": 4891, "task_loss": 0.5515473484992981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5643899440765381, "epoch": 4.14, "learning_rate": 2.932375316990702e-05, "loss": 0.5864, "step": 4892, "task_loss": 0.9507386684417725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5338196754455566, "epoch": 4.14, "learning_rate": 2.9319526627218936e-05, "loss": 0.7019, "step": 4893, "task_loss": 1.1920231580734253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6536885499954224, "epoch": 4.14, "learning_rate": 2.9315300084530856e-05, "loss": 0.6259, "step": 4894, "task_loss": 0.6254956722259521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5598927736282349, "epoch": 4.14, "learning_rate": 2.9311073541842776e-05, "loss": 0.5207, "step": 4895, "task_loss": 0.8743888139724731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8147443532943726, "epoch": 4.14, "learning_rate": 2.9306846999154692e-05, "loss": 0.4724, "step": 4896, "task_loss": 0.9306342005729675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4392967224121094, "epoch": 4.14, "learning_rate": 2.9302620456466612e-05, "loss": 0.5611, "step": 4897, "task_loss": 1.0989532470703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.672174334526062, "epoch": 4.14, "learning_rate": 2.929839391377853e-05, "loss": 0.7541, "step": 4898, "task_loss": 1.0243942737579346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3433629274368286, "epoch": 4.14, "learning_rate": 2.9294167371090448e-05, "loss": 0.4786, "step": 4899, "task_loss": 0.07585060596466064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5659266710281372, "epoch": 4.14, "learning_rate": 2.9289940828402368e-05, "loss": 0.5755, "step": 4900, "task_loss": 0.6905208230018616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6160291433334351, "epoch": 4.14, "learning_rate": 2.9285714285714288e-05, "loss": 0.5173, "step": 4901, "task_loss": 0.7377527952194214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3513527512550354, "epoch": 4.14, "learning_rate": 2.9281487743026204e-05, "loss": 0.4925, "step": 4902, "task_loss": 0.9227238297462463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3689699172973633, "epoch": 4.14, "learning_rate": 2.9277261200338124e-05, "loss": 0.5694, "step": 4903, "task_loss": 0.502204954624176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3335379958152771, "epoch": 4.15, "learning_rate": 2.9273034657650043e-05, "loss": 0.396, "step": 4904, "task_loss": 0.2030128389596939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3724108040332794, "epoch": 4.15, "learning_rate": 2.9268808114961966e-05, "loss": 0.3658, "step": 4905, "task_loss": 0.30825191736221313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6078504323959351, "epoch": 4.15, "learning_rate": 2.926458157227388e-05, "loss": 0.8306, "step": 4906, "task_loss": 0.8077676296234131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4799591898918152, "epoch": 4.15, "learning_rate": 2.9260355029585803e-05, "loss": 0.5028, "step": 4907, "task_loss": 0.646205484867096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6094024181365967, "epoch": 4.15, "learning_rate": 2.9256128486897722e-05, "loss": 0.6137, "step": 4908, "task_loss": 0.3010289669036865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5203437209129333, "epoch": 4.15, "learning_rate": 2.9251901944209635e-05, "loss": 0.6958, "step": 4909, "task_loss": 0.4879305362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5272976160049438, "epoch": 4.15, "learning_rate": 2.924767540152156e-05, "loss": 0.5658, "step": 4910, "task_loss": 0.7172080278396606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.604246199131012, "epoch": 4.15, "learning_rate": 2.9243448858833478e-05, "loss": 0.544, "step": 4911, "task_loss": 0.9679908752441406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.368024080991745, "epoch": 4.15, "learning_rate": 2.923922231614539e-05, "loss": 0.464, "step": 4912, "task_loss": 0.7688194513320923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9912531971931458, "epoch": 4.15, "learning_rate": 2.9234995773457314e-05, "loss": 0.7213, "step": 4913, "task_loss": 0.8605411052703857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7077438831329346, "epoch": 4.15, "learning_rate": 2.9230769230769234e-05, "loss": 0.6176, "step": 4914, "task_loss": 0.9759804606437683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.266655296087265, "epoch": 4.15, "learning_rate": 2.922654268808115e-05, "loss": 0.3885, "step": 4915, "task_loss": 0.04503697156906128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5361075401306152, "epoch": 4.16, "learning_rate": 2.922231614539307e-05, "loss": 0.5723, "step": 4916, "task_loss": 0.4291464388370514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5263949036598206, "epoch": 4.16, "learning_rate": 2.921808960270499e-05, "loss": 0.5451, "step": 4917, "task_loss": 0.5300502181053162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44350963830947876, "epoch": 4.16, "learning_rate": 2.9213863060016906e-05, "loss": 0.605, "step": 4918, "task_loss": 0.7421781420707703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5925217866897583, "epoch": 4.16, "learning_rate": 2.9209636517328826e-05, "loss": 0.6239, "step": 4919, "task_loss": 0.6273420453071594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46450141072273254, "epoch": 4.16, "learning_rate": 2.9205409974640746e-05, "loss": 0.5079, "step": 4920, "task_loss": 0.5449296832084656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5474029183387756, "epoch": 4.16, "learning_rate": 2.9201183431952665e-05, "loss": 0.6028, "step": 4921, "task_loss": 0.6845722198486328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3770174980163574, "epoch": 4.16, "learning_rate": 2.919695688926458e-05, "loss": 0.5507, "step": 4922, "task_loss": 0.7765823602676392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4429907500743866, "epoch": 4.16, "learning_rate": 2.91927303465765e-05, "loss": 0.5638, "step": 4923, "task_loss": 0.6939391493797302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44498199224472046, "epoch": 4.16, "learning_rate": 2.9188503803888425e-05, "loss": 0.4144, "step": 4924, "task_loss": 0.47263678908348083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.643986165523529, "epoch": 4.16, "learning_rate": 2.9184277261200338e-05, "loss": 0.4883, "step": 4925, "task_loss": 0.41211098432540894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29165083169937134, "epoch": 4.16, "learning_rate": 2.9180050718512257e-05, "loss": 0.3935, "step": 4926, "task_loss": 0.3426480293273926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29551804065704346, "epoch": 4.16, "learning_rate": 2.917582417582418e-05, "loss": 0.5643, "step": 4927, "task_loss": 0.3788049817085266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39327535033226013, "epoch": 4.17, "learning_rate": 2.9171597633136093e-05, "loss": 0.4624, "step": 4928, "task_loss": 0.1327672153711319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.406957745552063, "epoch": 4.17, "learning_rate": 2.9167371090448013e-05, "loss": 0.4637, "step": 4929, "task_loss": 0.651806652545929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5947275161743164, "epoch": 4.17, "learning_rate": 2.9163144547759936e-05, "loss": 0.6036, "step": 4930, "task_loss": 0.6267604231834412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3165002763271332, "epoch": 4.17, "learning_rate": 2.915891800507185e-05, "loss": 0.4925, "step": 4931, "task_loss": 1.1059143543243408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6140713691711426, "epoch": 4.17, "learning_rate": 2.9154691462383772e-05, "loss": 0.6231, "step": 4932, "task_loss": 0.8499069213867188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3954935669898987, "epoch": 4.17, "learning_rate": 2.9150464919695692e-05, "loss": 0.6254, "step": 4933, "task_loss": 0.9685243368148804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23880460858345032, "epoch": 4.17, "learning_rate": 2.9146238377007605e-05, "loss": 0.716, "step": 4934, "task_loss": 0.049141138792037964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.657178521156311, "epoch": 4.17, "learning_rate": 2.9142011834319528e-05, "loss": 0.5471, "step": 4935, "task_loss": 0.7862703800201416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16087676584720612, "epoch": 4.17, "learning_rate": 2.9137785291631448e-05, "loss": 0.4434, "step": 4936, "task_loss": 0.021729743108153343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5415525436401367, "epoch": 4.17, "learning_rate": 2.9133558748943368e-05, "loss": 0.5249, "step": 4937, "task_loss": 1.0101770162582397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37750259041786194, "epoch": 4.17, "learning_rate": 2.9129332206255284e-05, "loss": 0.5025, "step": 4938, "task_loss": 0.8420785069465637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4695941209793091, "epoch": 4.17, "learning_rate": 2.9125105663567204e-05, "loss": 0.6695, "step": 4939, "task_loss": 0.8085657358169556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.481032133102417, "epoch": 4.18, "learning_rate": 2.9120879120879123e-05, "loss": 0.5884, "step": 4940, "task_loss": 1.667453646659851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5970499515533447, "epoch": 4.18, "learning_rate": 2.911665257819104e-05, "loss": 0.4295, "step": 4941, "task_loss": 0.460021048784256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35698890686035156, "epoch": 4.18, "learning_rate": 2.911242603550296e-05, "loss": 0.4166, "step": 4942, "task_loss": 0.7841063141822815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38521382212638855, "epoch": 4.18, "learning_rate": 2.910819949281488e-05, "loss": 0.4549, "step": 4943, "task_loss": 0.7064574956893921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38757622241973877, "epoch": 4.18, "learning_rate": 2.9103972950126796e-05, "loss": 0.5376, "step": 4944, "task_loss": 0.41958001255989075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5023869872093201, "epoch": 4.18, "learning_rate": 2.9099746407438715e-05, "loss": 0.4174, "step": 4945, "task_loss": 0.9790821075439453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5780279040336609, "epoch": 4.18, "learning_rate": 2.9095519864750635e-05, "loss": 0.5833, "step": 4946, "task_loss": 0.16638527810573578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5554252862930298, "epoch": 4.18, "learning_rate": 2.909129332206255e-05, "loss": 0.5079, "step": 4947, "task_loss": 0.5973990559577942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5831525921821594, "epoch": 4.18, "learning_rate": 2.908706677937447e-05, "loss": 0.4806, "step": 4948, "task_loss": 0.5166064500808716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6369892954826355, "epoch": 4.18, "learning_rate": 2.9082840236686394e-05, "loss": 0.554, "step": 4949, "task_loss": 1.5615217685699463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.55471271276474, "epoch": 4.18, "learning_rate": 2.9078613693998314e-05, "loss": 0.4924, "step": 4950, "task_loss": 0.8947004079818726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29818692803382874, "epoch": 4.19, "learning_rate": 2.9074387151310227e-05, "loss": 0.4874, "step": 4951, "task_loss": 0.04880141094326973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5417750477790833, "epoch": 4.19, "learning_rate": 2.907016060862215e-05, "loss": 0.444, "step": 4952, "task_loss": 0.8822089433670044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.523271381855011, "epoch": 4.19, "learning_rate": 2.906593406593407e-05, "loss": 0.5113, "step": 4953, "task_loss": 1.0857772827148438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5274238586425781, "epoch": 4.19, "learning_rate": 2.9061707523245986e-05, "loss": 0.5191, "step": 4954, "task_loss": 0.7283214330673218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7125116586685181, "epoch": 4.19, "learning_rate": 2.9057480980557906e-05, "loss": 0.61, "step": 4955, "task_loss": 0.6378865838050842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2907133102416992, "epoch": 4.19, "learning_rate": 2.9053254437869826e-05, "loss": 0.6648, "step": 4956, "task_loss": 0.9520096778869629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6296253204345703, "epoch": 4.19, "learning_rate": 2.9049027895181742e-05, "loss": 0.5065, "step": 4957, "task_loss": 0.3367064595222473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3917253613471985, "epoch": 4.19, "learning_rate": 2.9044801352493662e-05, "loss": 0.548, "step": 4958, "task_loss": 0.5287610292434692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5130088925361633, "epoch": 4.19, "learning_rate": 2.904057480980558e-05, "loss": 0.5093, "step": 4959, "task_loss": 0.29144880175590515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6891018152236938, "epoch": 4.19, "learning_rate": 2.9036348267117498e-05, "loss": 0.4839, "step": 4960, "task_loss": 0.2483626902103424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23162618279457092, "epoch": 4.19, "learning_rate": 2.9032121724429418e-05, "loss": 0.4695, "step": 4961, "task_loss": 1.7705329656600952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5970432758331299, "epoch": 4.19, "learning_rate": 2.9027895181741337e-05, "loss": 0.609, "step": 4962, "task_loss": 0.8749106526374817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4535556137561798, "epoch": 4.2, "learning_rate": 2.9023668639053254e-05, "loss": 0.554, "step": 4963, "task_loss": 0.5557235479354858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28293246030807495, "epoch": 4.2, "learning_rate": 2.9019442096365173e-05, "loss": 0.5694, "step": 4964, "task_loss": 0.5637669563293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5207484364509583, "epoch": 4.2, "learning_rate": 2.9015215553677093e-05, "loss": 0.5387, "step": 4965, "task_loss": 0.42689841985702515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3967847228050232, "epoch": 4.2, "learning_rate": 2.9010989010989016e-05, "loss": 0.5142, "step": 4966, "task_loss": 0.8248480558395386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8131296634674072, "epoch": 4.2, "learning_rate": 2.900676246830093e-05, "loss": 0.6822, "step": 4967, "task_loss": 0.9920462369918823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23411710560321808, "epoch": 4.2, "learning_rate": 2.900253592561285e-05, "loss": 0.5174, "step": 4968, "task_loss": 0.4750393331050873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47619348764419556, "epoch": 4.2, "learning_rate": 2.8998309382924772e-05, "loss": 0.5657, "step": 4969, "task_loss": 0.44457554817199707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0993400812149048, "epoch": 4.2, "learning_rate": 2.8994082840236685e-05, "loss": 0.8597, "step": 4970, "task_loss": 1.513477087020874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5288493633270264, "epoch": 4.2, "learning_rate": 2.8989856297548608e-05, "loss": 0.4473, "step": 4971, "task_loss": 0.5993368029594421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5062902569770813, "epoch": 4.2, "learning_rate": 2.8985629754860528e-05, "loss": 0.5725, "step": 4972, "task_loss": 0.8104445934295654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7969005107879639, "epoch": 4.2, "learning_rate": 2.898140321217244e-05, "loss": 0.6367, "step": 4973, "task_loss": 0.9035048484802246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7410600185394287, "epoch": 4.2, "learning_rate": 2.8977176669484364e-05, "loss": 0.5603, "step": 4974, "task_loss": 0.8375561237335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3379054069519043, "epoch": 4.21, "learning_rate": 2.8972950126796284e-05, "loss": 0.4879, "step": 4975, "task_loss": 0.5272780656814575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38761106133461, "epoch": 4.21, "learning_rate": 2.8968723584108197e-05, "loss": 0.5699, "step": 4976, "task_loss": 0.515591561794281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8695029020309448, "epoch": 4.21, "learning_rate": 2.896449704142012e-05, "loss": 0.6183, "step": 4977, "task_loss": 1.400099515914917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4410107731819153, "epoch": 4.21, "learning_rate": 2.896027049873204e-05, "loss": 0.5514, "step": 4978, "task_loss": 0.16848812997341156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5139065384864807, "epoch": 4.21, "learning_rate": 2.895604395604396e-05, "loss": 0.4932, "step": 4979, "task_loss": 0.7612088322639465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3685995936393738, "epoch": 4.21, "learning_rate": 2.8951817413355876e-05, "loss": 0.5016, "step": 4980, "task_loss": 0.49263593554496765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40824630856513977, "epoch": 4.21, "learning_rate": 2.8947590870667795e-05, "loss": 0.7652, "step": 4981, "task_loss": 0.26346105337142944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4100915789604187, "epoch": 4.21, "learning_rate": 2.8943364327979715e-05, "loss": 0.3495, "step": 4982, "task_loss": 1.1683602333068848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5771180391311646, "epoch": 4.21, "learning_rate": 2.893913778529163e-05, "loss": 0.5585, "step": 4983, "task_loss": 0.6446143388748169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0801568031311035, "epoch": 4.21, "learning_rate": 2.893491124260355e-05, "loss": 0.8557, "step": 4984, "task_loss": 1.6549850702285767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6279933452606201, "epoch": 4.21, "learning_rate": 2.893068469991547e-05, "loss": 0.5336, "step": 4985, "task_loss": 0.9761338233947754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6012387275695801, "epoch": 4.21, "learning_rate": 2.8926458157227387e-05, "loss": 0.7233, "step": 4986, "task_loss": 0.8339069485664368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4568895995616913, "epoch": 4.22, "learning_rate": 2.8922231614539307e-05, "loss": 0.5408, "step": 4987, "task_loss": 1.250498652458191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.874041736125946, "epoch": 4.22, "learning_rate": 2.891800507185123e-05, "loss": 0.5565, "step": 4988, "task_loss": 1.4179728031158447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49423927068710327, "epoch": 4.22, "learning_rate": 2.8913778529163143e-05, "loss": 0.6198, "step": 4989, "task_loss": 0.7868390679359436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43859049677848816, "epoch": 4.22, "learning_rate": 2.8909551986475063e-05, "loss": 0.5441, "step": 4990, "task_loss": 0.7789090871810913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2009529322385788, "epoch": 4.22, "learning_rate": 2.8905325443786986e-05, "loss": 0.4136, "step": 4991, "task_loss": 0.06924349069595337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5552215576171875, "epoch": 4.22, "learning_rate": 2.89010989010989e-05, "loss": 0.5511, "step": 4992, "task_loss": 0.6917145848274231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27759259939193726, "epoch": 4.22, "learning_rate": 2.889687235841082e-05, "loss": 0.3879, "step": 4993, "task_loss": 0.2607293128967285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8054690957069397, "epoch": 4.22, "learning_rate": 2.8892645815722742e-05, "loss": 0.503, "step": 4994, "task_loss": 1.4566730260849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48486143350601196, "epoch": 4.22, "learning_rate": 2.888841927303466e-05, "loss": 0.5523, "step": 4995, "task_loss": 0.975770890712738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27125826478004456, "epoch": 4.22, "learning_rate": 2.8884192730346578e-05, "loss": 0.553, "step": 4996, "task_loss": 0.5320665836334229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2912842333316803, "epoch": 4.22, "learning_rate": 2.8879966187658498e-05, "loss": 0.5848, "step": 4997, "task_loss": 0.5996488332748413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3529224395751953, "epoch": 4.22, "learning_rate": 2.8875739644970417e-05, "loss": 0.5161, "step": 4998, "task_loss": 0.22224129736423492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9461209774017334, "epoch": 4.23, "learning_rate": 2.8871513102282334e-05, "loss": 0.5461, "step": 4999, "task_loss": 0.9298988580703735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.363048255443573, "epoch": 4.23, "learning_rate": 2.8867286559594254e-05, "loss": 0.4928, "step": 5000, "task_loss": 0.7944290637969971 }, { "epoch": 4.23, "eval_accuracy": 0.9035643564356436, "eval_loss": 0.3522135317325592, "eval_runtime": 229.0744, "eval_samples_per_second": 110.226, "eval_steps_per_second": 0.864, "step": 5000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5424635410308838, "epoch": 4.23, "learning_rate": 2.8863060016906173e-05, "loss": 0.5581, "step": 5001, "task_loss": 0.7681723237037659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5082840323448181, "epoch": 4.23, "learning_rate": 2.885883347421809e-05, "loss": 0.6123, "step": 5002, "task_loss": 0.31173062324523926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3281029462814331, "epoch": 4.23, "learning_rate": 2.885460693153001e-05, "loss": 0.5049, "step": 5003, "task_loss": 0.37610554695129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5271156430244446, "epoch": 4.23, "learning_rate": 2.885038038884193e-05, "loss": 0.5752, "step": 5004, "task_loss": 0.5435252785682678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3205392062664032, "epoch": 4.23, "learning_rate": 2.8846153846153845e-05, "loss": 0.4256, "step": 5005, "task_loss": 0.34773632884025574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.416623055934906, "epoch": 4.23, "learning_rate": 2.8841927303465765e-05, "loss": 0.6099, "step": 5006, "task_loss": 0.2906308174133301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45569419860839844, "epoch": 4.23, "learning_rate": 2.8837700760777685e-05, "loss": 0.4321, "step": 5007, "task_loss": 0.5803009867668152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37213602662086487, "epoch": 4.23, "learning_rate": 2.8833474218089608e-05, "loss": 0.4653, "step": 5008, "task_loss": 0.07518910616636276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5695041418075562, "epoch": 4.23, "learning_rate": 2.882924767540152e-05, "loss": 0.487, "step": 5009, "task_loss": 0.404824435710907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6915467381477356, "epoch": 4.23, "learning_rate": 2.882502113271344e-05, "loss": 0.6883, "step": 5010, "task_loss": 0.7848670482635498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39997023344039917, "epoch": 4.24, "learning_rate": 2.8820794590025364e-05, "loss": 0.4293, "step": 5011, "task_loss": 0.4946521818637848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4524044096469879, "epoch": 4.24, "learning_rate": 2.8816568047337277e-05, "loss": 0.4829, "step": 5012, "task_loss": 0.7310373187065125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5082722902297974, "epoch": 4.24, "learning_rate": 2.88123415046492e-05, "loss": 0.4349, "step": 5013, "task_loss": 0.5728713274002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5411403179168701, "epoch": 4.24, "learning_rate": 2.880811496196112e-05, "loss": 0.64, "step": 5014, "task_loss": 0.595592200756073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5737406611442566, "epoch": 4.24, "learning_rate": 2.8803888419273033e-05, "loss": 0.5593, "step": 5015, "task_loss": 0.3804052472114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3536869287490845, "epoch": 4.24, "learning_rate": 2.8799661876584956e-05, "loss": 0.5477, "step": 5016, "task_loss": 0.5726757049560547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35529932379722595, "epoch": 4.24, "learning_rate": 2.8795435333896876e-05, "loss": 0.4997, "step": 5017, "task_loss": 0.9764136075973511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7777359485626221, "epoch": 4.24, "learning_rate": 2.8791208791208792e-05, "loss": 0.5859, "step": 5018, "task_loss": 0.6851890087127686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.544541597366333, "epoch": 4.24, "learning_rate": 2.878698224852071e-05, "loss": 0.536, "step": 5019, "task_loss": 0.9854859709739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5944560766220093, "epoch": 4.24, "learning_rate": 2.878275570583263e-05, "loss": 0.4635, "step": 5020, "task_loss": 0.2814624011516571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35691067576408386, "epoch": 4.24, "learning_rate": 2.8778529163144548e-05, "loss": 0.4022, "step": 5021, "task_loss": 0.2276492565870285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33211642503738403, "epoch": 4.24, "learning_rate": 2.8774302620456467e-05, "loss": 0.4664, "step": 5022, "task_loss": 1.2398892641067505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5359298586845398, "epoch": 4.25, "learning_rate": 2.8770076077768387e-05, "loss": 0.3557, "step": 5023, "task_loss": 0.5677745342254639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.598246693611145, "epoch": 4.25, "learning_rate": 2.8765849535080307e-05, "loss": 0.6645, "step": 5024, "task_loss": 0.45430052280426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5481970906257629, "epoch": 4.25, "learning_rate": 2.8761622992392223e-05, "loss": 0.6308, "step": 5025, "task_loss": 1.0280213356018066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5252529382705688, "epoch": 4.25, "learning_rate": 2.8757396449704143e-05, "loss": 0.5848, "step": 5026, "task_loss": 0.3855023980140686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4001982808113098, "epoch": 4.25, "learning_rate": 2.8753169907016063e-05, "loss": 0.5481, "step": 5027, "task_loss": 0.2155093550682068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5002865195274353, "epoch": 4.25, "learning_rate": 2.874894336432798e-05, "loss": 0.5, "step": 5028, "task_loss": 0.6530987620353699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7264885902404785, "epoch": 4.25, "learning_rate": 2.87447168216399e-05, "loss": 0.5128, "step": 5029, "task_loss": 1.5708767175674438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7406023740768433, "epoch": 4.25, "learning_rate": 2.8740490278951822e-05, "loss": 0.5352, "step": 5030, "task_loss": 0.8087359070777893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5345301628112793, "epoch": 4.25, "learning_rate": 2.8736263736263735e-05, "loss": 0.5976, "step": 5031, "task_loss": 1.0272800922393799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4186236262321472, "epoch": 4.25, "learning_rate": 2.8732037193575655e-05, "loss": 0.5201, "step": 5032, "task_loss": 0.10075681656599045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5343166589736938, "epoch": 4.25, "learning_rate": 2.8727810650887578e-05, "loss": 0.4477, "step": 5033, "task_loss": 0.7180424928665161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6384636163711548, "epoch": 4.26, "learning_rate": 2.872358410819949e-05, "loss": 0.5919, "step": 5034, "task_loss": 0.8868371844291687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4453398287296295, "epoch": 4.26, "learning_rate": 2.8719357565511414e-05, "loss": 0.541, "step": 5035, "task_loss": 0.7494087219238281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7095698118209839, "epoch": 4.26, "learning_rate": 2.8715131022823334e-05, "loss": 0.4759, "step": 5036, "task_loss": 1.0688354969024658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9623632431030273, "epoch": 4.26, "learning_rate": 2.8710904480135253e-05, "loss": 0.8396, "step": 5037, "task_loss": 0.34394949674606323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6313179135322571, "epoch": 4.26, "learning_rate": 2.870667793744717e-05, "loss": 0.5707, "step": 5038, "task_loss": 0.4269314408302307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6322230100631714, "epoch": 4.26, "learning_rate": 2.870245139475909e-05, "loss": 0.6423, "step": 5039, "task_loss": 0.7876618504524231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5772720575332642, "epoch": 4.26, "learning_rate": 2.869822485207101e-05, "loss": 0.4847, "step": 5040, "task_loss": 0.2415463775396347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0083447694778442, "epoch": 4.26, "learning_rate": 2.8693998309382926e-05, "loss": 0.523, "step": 5041, "task_loss": 0.7287866473197937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35244181752204895, "epoch": 4.26, "learning_rate": 2.8689771766694845e-05, "loss": 0.4214, "step": 5042, "task_loss": 0.5554124116897583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2813867926597595, "epoch": 4.26, "learning_rate": 2.8685545224006765e-05, "loss": 0.4834, "step": 5043, "task_loss": 0.31542858481407166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9355268478393555, "epoch": 4.26, "learning_rate": 2.868131868131868e-05, "loss": 0.6104, "step": 5044, "task_loss": 0.2900858223438263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5138185620307922, "epoch": 4.26, "learning_rate": 2.86770921386306e-05, "loss": 0.4551, "step": 5045, "task_loss": 0.3275415003299713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5506541132926941, "epoch": 4.27, "learning_rate": 2.867286559594252e-05, "loss": 0.4276, "step": 5046, "task_loss": 0.8579879999160767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3394980728626251, "epoch": 4.27, "learning_rate": 2.8668639053254437e-05, "loss": 0.6012, "step": 5047, "task_loss": 0.4949684739112854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6559955477714539, "epoch": 4.27, "learning_rate": 2.8664412510566357e-05, "loss": 0.703, "step": 5048, "task_loss": 0.6371138691902161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7527486085891724, "epoch": 4.27, "learning_rate": 2.8660185967878277e-05, "loss": 0.6325, "step": 5049, "task_loss": 0.6538408398628235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3338960111141205, "epoch": 4.27, "learning_rate": 2.8655959425190193e-05, "loss": 0.3403, "step": 5050, "task_loss": 0.719497561454773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41151633858680725, "epoch": 4.27, "learning_rate": 2.8651732882502113e-05, "loss": 0.611, "step": 5051, "task_loss": 0.6953022480010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49113377928733826, "epoch": 4.27, "learning_rate": 2.8647506339814033e-05, "loss": 0.5068, "step": 5052, "task_loss": 0.5454524755477905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8189623355865479, "epoch": 4.27, "learning_rate": 2.8643279797125956e-05, "loss": 0.6825, "step": 5053, "task_loss": 1.085884928703308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5768108367919922, "epoch": 4.27, "learning_rate": 2.863905325443787e-05, "loss": 0.4658, "step": 5054, "task_loss": 1.254048228263855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6696983575820923, "epoch": 4.27, "learning_rate": 2.8634826711749792e-05, "loss": 0.5947, "step": 5055, "task_loss": 0.454205185174942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9175926446914673, "epoch": 4.27, "learning_rate": 2.863060016906171e-05, "loss": 0.5407, "step": 5056, "task_loss": 1.322234869003296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9288214445114136, "epoch": 4.27, "learning_rate": 2.8626373626373624e-05, "loss": 0.5564, "step": 5057, "task_loss": 1.1883230209350586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.310880184173584, "epoch": 4.28, "learning_rate": 2.8622147083685548e-05, "loss": 0.5495, "step": 5058, "task_loss": 0.13048161566257477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8108692169189453, "epoch": 4.28, "learning_rate": 2.8617920540997467e-05, "loss": 0.7225, "step": 5059, "task_loss": 0.5182772874832153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0437372922897339, "epoch": 4.28, "learning_rate": 2.8613693998309384e-05, "loss": 0.8408, "step": 5060, "task_loss": 1.1800932884216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4520098567008972, "epoch": 4.28, "learning_rate": 2.8609467455621303e-05, "loss": 0.581, "step": 5061, "task_loss": 0.5899325609207153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29349690675735474, "epoch": 4.28, "learning_rate": 2.8605240912933223e-05, "loss": 0.5258, "step": 5062, "task_loss": 0.9250083565711975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5980914831161499, "epoch": 4.28, "learning_rate": 2.860101437024514e-05, "loss": 0.6895, "step": 5063, "task_loss": 0.852351188659668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43609872460365295, "epoch": 4.28, "learning_rate": 2.859678782755706e-05, "loss": 0.6501, "step": 5064, "task_loss": 0.5329897403717041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.601274847984314, "epoch": 4.28, "learning_rate": 2.859256128486898e-05, "loss": 0.5103, "step": 5065, "task_loss": 0.4264865517616272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.501763105392456, "epoch": 4.28, "learning_rate": 2.85883347421809e-05, "loss": 0.4882, "step": 5066, "task_loss": 1.025362253189087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4265732169151306, "epoch": 4.28, "learning_rate": 2.8584108199492815e-05, "loss": 0.5224, "step": 5067, "task_loss": 0.1147022619843483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48253557085990906, "epoch": 4.28, "learning_rate": 2.8579881656804735e-05, "loss": 0.606, "step": 5068, "task_loss": 1.0819717645645142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8059843182563782, "epoch": 4.28, "learning_rate": 2.8575655114116655e-05, "loss": 0.5613, "step": 5069, "task_loss": 0.7770708799362183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7644687294960022, "epoch": 4.29, "learning_rate": 2.857142857142857e-05, "loss": 0.6374, "step": 5070, "task_loss": 1.2870728969573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39863288402557373, "epoch": 4.29, "learning_rate": 2.856720202874049e-05, "loss": 0.439, "step": 5071, "task_loss": 0.08547637611627579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3423424959182739, "epoch": 4.29, "learning_rate": 2.8562975486052414e-05, "loss": 0.4383, "step": 5072, "task_loss": 0.4734783172607422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.740888774394989, "epoch": 4.29, "learning_rate": 2.8558748943364327e-05, "loss": 0.808, "step": 5073, "task_loss": 0.8702861666679382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8916035890579224, "epoch": 4.29, "learning_rate": 2.8554522400676246e-05, "loss": 0.8385, "step": 5074, "task_loss": 0.7356163859367371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49436667561531067, "epoch": 4.29, "learning_rate": 2.855029585798817e-05, "loss": 0.7151, "step": 5075, "task_loss": 0.9598007798194885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3796423673629761, "epoch": 4.29, "learning_rate": 2.8546069315300083e-05, "loss": 0.5543, "step": 5076, "task_loss": 0.17260704934597015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6580232977867126, "epoch": 4.29, "learning_rate": 2.8541842772612006e-05, "loss": 0.6424, "step": 5077, "task_loss": 0.4667375087738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4417484402656555, "epoch": 4.29, "learning_rate": 2.8537616229923925e-05, "loss": 0.4634, "step": 5078, "task_loss": 0.511508047580719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7601735591888428, "epoch": 4.29, "learning_rate": 2.853338968723584e-05, "loss": 0.5502, "step": 5079, "task_loss": 0.7513731122016907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3145574927330017, "epoch": 4.29, "learning_rate": 2.852916314454776e-05, "loss": 0.4365, "step": 5080, "task_loss": 0.38452383875846863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5880226492881775, "epoch": 4.29, "learning_rate": 2.852493660185968e-05, "loss": 0.4497, "step": 5081, "task_loss": 1.0433785915374756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4004688262939453, "epoch": 4.3, "learning_rate": 2.85207100591716e-05, "loss": 0.5593, "step": 5082, "task_loss": 0.4971187710762024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43154376745224, "epoch": 4.3, "learning_rate": 2.8516483516483517e-05, "loss": 0.5792, "step": 5083, "task_loss": 0.7321638464927673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38204774260520935, "epoch": 4.3, "learning_rate": 2.8512256973795437e-05, "loss": 0.5214, "step": 5084, "task_loss": 0.6594197154045105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27718475461006165, "epoch": 4.3, "learning_rate": 2.8508030431107357e-05, "loss": 0.4359, "step": 5085, "task_loss": 0.03347624093294144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.82254958152771, "epoch": 4.3, "learning_rate": 2.8503803888419273e-05, "loss": 0.5682, "step": 5086, "task_loss": 0.59958416223526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6809030175209045, "epoch": 4.3, "learning_rate": 2.8499577345731193e-05, "loss": 0.5768, "step": 5087, "task_loss": 1.0686261653900146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3533551096916199, "epoch": 4.3, "learning_rate": 2.8495350803043113e-05, "loss": 0.4384, "step": 5088, "task_loss": 0.8124313950538635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6623342633247375, "epoch": 4.3, "learning_rate": 2.849112426035503e-05, "loss": 0.5361, "step": 5089, "task_loss": 0.6109859943389893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3484307527542114, "epoch": 4.3, "learning_rate": 2.848689771766695e-05, "loss": 0.4501, "step": 5090, "task_loss": 0.48259633779525757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43353691697120667, "epoch": 4.3, "learning_rate": 2.848267117497887e-05, "loss": 0.5263, "step": 5091, "task_loss": 0.579473614692688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49614328145980835, "epoch": 4.3, "learning_rate": 2.8478444632290785e-05, "loss": 0.4894, "step": 5092, "task_loss": 0.628524899482727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5502766370773315, "epoch": 4.3, "learning_rate": 2.8474218089602705e-05, "loss": 0.5102, "step": 5093, "task_loss": 1.0286712646484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.662949800491333, "epoch": 4.31, "learning_rate": 2.8469991546914628e-05, "loss": 0.5032, "step": 5094, "task_loss": 0.5948931574821472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3951289653778076, "epoch": 4.31, "learning_rate": 2.8465765004226547e-05, "loss": 0.5088, "step": 5095, "task_loss": 0.4640941023826599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.357793390750885, "epoch": 4.31, "learning_rate": 2.846153846153846e-05, "loss": 0.6069, "step": 5096, "task_loss": 1.0247195959091187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37486934661865234, "epoch": 4.31, "learning_rate": 2.8457311918850383e-05, "loss": 0.3958, "step": 5097, "task_loss": 0.11131393909454346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5342576503753662, "epoch": 4.31, "learning_rate": 2.8453085376162303e-05, "loss": 0.6418, "step": 5098, "task_loss": 0.7262894511222839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5514885187149048, "epoch": 4.31, "learning_rate": 2.844885883347422e-05, "loss": 0.6562, "step": 5099, "task_loss": 1.1110492944717407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40893006324768066, "epoch": 4.31, "learning_rate": 2.844463229078614e-05, "loss": 0.4178, "step": 5100, "task_loss": 0.717585563659668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5973784327507019, "epoch": 4.31, "learning_rate": 2.844040574809806e-05, "loss": 0.572, "step": 5101, "task_loss": 1.197821021080017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6897644996643066, "epoch": 4.31, "learning_rate": 2.8436179205409975e-05, "loss": 0.4778, "step": 5102, "task_loss": 0.3001370131969452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5698708295822144, "epoch": 4.31, "learning_rate": 2.8431952662721895e-05, "loss": 0.6569, "step": 5103, "task_loss": 1.7258988618850708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3880610764026642, "epoch": 4.31, "learning_rate": 2.8427726120033815e-05, "loss": 0.5722, "step": 5104, "task_loss": 0.6297459006309509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4154919981956482, "epoch": 4.32, "learning_rate": 2.842349957734573e-05, "loss": 0.4683, "step": 5105, "task_loss": 0.7200974822044373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6565115451812744, "epoch": 4.32, "learning_rate": 2.841927303465765e-05, "loss": 0.4753, "step": 5106, "task_loss": 1.7250747680664062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5814658999443054, "epoch": 4.32, "learning_rate": 2.841504649196957e-05, "loss": 0.5159, "step": 5107, "task_loss": 0.7972137928009033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27703380584716797, "epoch": 4.32, "learning_rate": 2.8410819949281487e-05, "loss": 0.4695, "step": 5108, "task_loss": 0.8439851999282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6899734139442444, "epoch": 4.32, "learning_rate": 2.8406593406593407e-05, "loss": 0.5645, "step": 5109, "task_loss": 0.6597148776054382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0467429161071777, "epoch": 4.32, "learning_rate": 2.8402366863905327e-05, "loss": 0.6363, "step": 5110, "task_loss": 0.989573061466217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5966413021087646, "epoch": 4.32, "learning_rate": 2.839814032121725e-05, "loss": 0.5452, "step": 5111, "task_loss": 1.4620239734649658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8440982103347778, "epoch": 4.32, "learning_rate": 2.8393913778529163e-05, "loss": 0.5259, "step": 5112, "task_loss": 0.7884932160377502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6587424278259277, "epoch": 4.32, "learning_rate": 2.8389687235841082e-05, "loss": 0.542, "step": 5113, "task_loss": 1.0504063367843628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8028830885887146, "epoch": 4.32, "learning_rate": 2.8385460693153005e-05, "loss": 0.5945, "step": 5114, "task_loss": 0.6810760498046875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.584349513053894, "epoch": 4.32, "learning_rate": 2.838123415046492e-05, "loss": 0.5065, "step": 5115, "task_loss": 0.28613045811653137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33873969316482544, "epoch": 4.32, "learning_rate": 2.8377007607776838e-05, "loss": 0.5025, "step": 5116, "task_loss": 0.44696739315986633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3172038197517395, "epoch": 4.33, "learning_rate": 2.837278106508876e-05, "loss": 0.3665, "step": 5117, "task_loss": 0.30419179797172546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3495364785194397, "epoch": 4.33, "learning_rate": 2.8368554522400674e-05, "loss": 0.4374, "step": 5118, "task_loss": 0.423458993434906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7016594409942627, "epoch": 4.33, "learning_rate": 2.8364327979712597e-05, "loss": 0.6444, "step": 5119, "task_loss": 0.25258922576904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5477955341339111, "epoch": 4.33, "learning_rate": 2.8360101437024517e-05, "loss": 0.5276, "step": 5120, "task_loss": 0.6664541959762573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44633913040161133, "epoch": 4.33, "learning_rate": 2.835587489433643e-05, "loss": 0.4672, "step": 5121, "task_loss": 0.6767635941505432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6306418776512146, "epoch": 4.33, "learning_rate": 2.8351648351648353e-05, "loss": 0.5734, "step": 5122, "task_loss": 1.5109035968780518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8460204601287842, "epoch": 4.33, "learning_rate": 2.8347421808960273e-05, "loss": 0.5884, "step": 5123, "task_loss": 0.9197711944580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5007159113883972, "epoch": 4.33, "learning_rate": 2.8343195266272193e-05, "loss": 0.5058, "step": 5124, "task_loss": 0.49399128556251526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6091967821121216, "epoch": 4.33, "learning_rate": 2.833896872358411e-05, "loss": 0.4897, "step": 5125, "task_loss": 0.7462937235832214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42689502239227295, "epoch": 4.33, "learning_rate": 2.833474218089603e-05, "loss": 0.47, "step": 5126, "task_loss": 1.3030518293380737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4012297987937927, "epoch": 4.33, "learning_rate": 2.833051563820795e-05, "loss": 0.6271, "step": 5127, "task_loss": 0.8749964833259583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5435868501663208, "epoch": 4.33, "learning_rate": 2.8326289095519865e-05, "loss": 0.5572, "step": 5128, "task_loss": 1.041831135749817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1026819944381714, "epoch": 4.34, "learning_rate": 2.8322062552831785e-05, "loss": 0.7004, "step": 5129, "task_loss": 0.6742920875549316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5178220272064209, "epoch": 4.34, "learning_rate": 2.8317836010143704e-05, "loss": 0.5686, "step": 5130, "task_loss": 0.737801730632782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2614779472351074, "epoch": 4.34, "learning_rate": 2.831360946745562e-05, "loss": 0.454, "step": 5131, "task_loss": 0.785963237285614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6100143194198608, "epoch": 4.34, "learning_rate": 2.830938292476754e-05, "loss": 0.5511, "step": 5132, "task_loss": 1.0341075658798218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25425082445144653, "epoch": 4.34, "learning_rate": 2.830515638207946e-05, "loss": 0.4488, "step": 5133, "task_loss": 0.658420979976654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48537927865982056, "epoch": 4.34, "learning_rate": 2.8300929839391377e-05, "loss": 0.6504, "step": 5134, "task_loss": 0.6704983115196228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37334245443344116, "epoch": 4.34, "learning_rate": 2.8296703296703296e-05, "loss": 0.5203, "step": 5135, "task_loss": 0.3471969664096832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.300645649433136, "epoch": 4.34, "learning_rate": 2.829247675401522e-05, "loss": 0.3204, "step": 5136, "task_loss": 0.061320386826992035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3374484181404114, "epoch": 4.34, "learning_rate": 2.8288250211327132e-05, "loss": 0.4713, "step": 5137, "task_loss": 0.4944455027580261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5325794816017151, "epoch": 4.34, "learning_rate": 2.8284023668639052e-05, "loss": 0.506, "step": 5138, "task_loss": 0.6164921522140503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29305213689804077, "epoch": 4.34, "learning_rate": 2.8279797125950975e-05, "loss": 0.5322, "step": 5139, "task_loss": 0.2341928333044052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41257110238075256, "epoch": 4.34, "learning_rate": 2.8275570583262895e-05, "loss": 0.4207, "step": 5140, "task_loss": 0.22187206149101257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5428314805030823, "epoch": 4.35, "learning_rate": 2.827134404057481e-05, "loss": 0.5645, "step": 5141, "task_loss": 0.457040935754776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38220298290252686, "epoch": 4.35, "learning_rate": 2.826711749788673e-05, "loss": 0.4694, "step": 5142, "task_loss": 0.7440987825393677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2930101752281189, "epoch": 4.35, "learning_rate": 2.826289095519865e-05, "loss": 0.3929, "step": 5143, "task_loss": 0.4625621438026428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3664424419403076, "epoch": 4.35, "learning_rate": 2.8258664412510567e-05, "loss": 0.4099, "step": 5144, "task_loss": 0.12081511318683624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.378319650888443, "epoch": 4.35, "learning_rate": 2.8254437869822487e-05, "loss": 0.4599, "step": 5145, "task_loss": 0.8400179147720337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.004839301109314, "epoch": 4.35, "learning_rate": 2.8250211327134407e-05, "loss": 0.577, "step": 5146, "task_loss": 0.9192570447921753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2919743061065674, "epoch": 4.35, "learning_rate": 2.8245984784446323e-05, "loss": 0.4254, "step": 5147, "task_loss": 0.27241548895835876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3417454659938812, "epoch": 4.35, "learning_rate": 2.8241758241758243e-05, "loss": 0.589, "step": 5148, "task_loss": 0.3698180019855499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6555811762809753, "epoch": 4.35, "learning_rate": 2.8237531699070162e-05, "loss": 0.6556, "step": 5149, "task_loss": 1.003419041633606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4510529041290283, "epoch": 4.35, "learning_rate": 2.823330515638208e-05, "loss": 0.5821, "step": 5150, "task_loss": 0.924353301525116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5813985466957092, "epoch": 4.35, "learning_rate": 2.8229078613694e-05, "loss": 0.4638, "step": 5151, "task_loss": 0.7580537796020508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7579513788223267, "epoch": 4.35, "learning_rate": 2.8224852071005918e-05, "loss": 0.6081, "step": 5152, "task_loss": 1.168509840965271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6822163462638855, "epoch": 4.36, "learning_rate": 2.822062552831784e-05, "loss": 0.5068, "step": 5153, "task_loss": 0.9986529350280762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42662423849105835, "epoch": 4.36, "learning_rate": 2.8216398985629754e-05, "loss": 0.4762, "step": 5154, "task_loss": 0.916824221611023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.895922064781189, "epoch": 4.36, "learning_rate": 2.8212172442941674e-05, "loss": 0.6484, "step": 5155, "task_loss": 0.5292621850967407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25648730993270874, "epoch": 4.36, "learning_rate": 2.8207945900253597e-05, "loss": 0.6055, "step": 5156, "task_loss": 0.9580972194671631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.676588237285614, "epoch": 4.36, "learning_rate": 2.820371935756551e-05, "loss": 0.6144, "step": 5157, "task_loss": 1.0475866794586182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28063124418258667, "epoch": 4.36, "learning_rate": 2.8199492814877433e-05, "loss": 0.4578, "step": 5158, "task_loss": 0.45988017320632935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45851457118988037, "epoch": 4.36, "learning_rate": 2.8195266272189353e-05, "loss": 0.4849, "step": 5159, "task_loss": 0.4729257822036743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49214595556259155, "epoch": 4.36, "learning_rate": 2.8191039729501266e-05, "loss": 0.6618, "step": 5160, "task_loss": 0.5544630289077759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6935632228851318, "epoch": 4.36, "learning_rate": 2.818681318681319e-05, "loss": 0.634, "step": 5161, "task_loss": 0.6638074517250061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30302244424819946, "epoch": 4.36, "learning_rate": 2.818258664412511e-05, "loss": 0.4642, "step": 5162, "task_loss": 0.034671150147914886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2157883197069168, "epoch": 4.36, "learning_rate": 2.8178360101437022e-05, "loss": 0.5834, "step": 5163, "task_loss": 0.12452114373445511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7114389538764954, "epoch": 4.36, "learning_rate": 2.8174133558748945e-05, "loss": 0.7532, "step": 5164, "task_loss": 0.911513090133667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6030280590057373, "epoch": 4.37, "learning_rate": 2.8169907016060865e-05, "loss": 0.6124, "step": 5165, "task_loss": 0.8780170679092407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5960670709609985, "epoch": 4.37, "learning_rate": 2.816568047337278e-05, "loss": 0.5157, "step": 5166, "task_loss": 1.470974087715149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6917482018470764, "epoch": 4.37, "learning_rate": 2.81614539306847e-05, "loss": 0.7225, "step": 5167, "task_loss": 1.1586589813232422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31945890188217163, "epoch": 4.37, "learning_rate": 2.815722738799662e-05, "loss": 0.4557, "step": 5168, "task_loss": 0.4502628445625305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44876235723495483, "epoch": 4.37, "learning_rate": 2.815300084530854e-05, "loss": 0.353, "step": 5169, "task_loss": 0.5005380511283875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6036719083786011, "epoch": 4.37, "learning_rate": 2.8148774302620457e-05, "loss": 0.4186, "step": 5170, "task_loss": 0.5820677280426025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6830223798751831, "epoch": 4.37, "learning_rate": 2.8144547759932376e-05, "loss": 0.7023, "step": 5171, "task_loss": 1.3801106214523315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.549087405204773, "epoch": 4.37, "learning_rate": 2.8140321217244296e-05, "loss": 0.4464, "step": 5172, "task_loss": 0.7122361660003662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8788726329803467, "epoch": 4.37, "learning_rate": 2.8136094674556212e-05, "loss": 0.5827, "step": 5173, "task_loss": 0.529780924320221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3800903856754303, "epoch": 4.37, "learning_rate": 2.8131868131868132e-05, "loss": 0.4638, "step": 5174, "task_loss": 0.19307269155979156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7118173837661743, "epoch": 4.37, "learning_rate": 2.8127641589180055e-05, "loss": 0.5074, "step": 5175, "task_loss": 1.2091130018234253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5326511859893799, "epoch": 4.38, "learning_rate": 2.8123415046491968e-05, "loss": 0.4148, "step": 5176, "task_loss": 1.1868786811828613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8008134365081787, "epoch": 4.38, "learning_rate": 2.8119188503803888e-05, "loss": 0.6732, "step": 5177, "task_loss": 0.4487271308898926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9325487613677979, "epoch": 4.38, "learning_rate": 2.811496196111581e-05, "loss": 0.5885, "step": 5178, "task_loss": 0.8965787887573242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3975347876548767, "epoch": 4.38, "learning_rate": 2.8110735418427724e-05, "loss": 0.4624, "step": 5179, "task_loss": 0.4935968816280365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3524544835090637, "epoch": 4.38, "learning_rate": 2.8106508875739644e-05, "loss": 0.4805, "step": 5180, "task_loss": 0.6915262341499329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5573157668113708, "epoch": 4.38, "learning_rate": 2.8102282333051567e-05, "loss": 0.5414, "step": 5181, "task_loss": 0.3662712275981903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4952223300933838, "epoch": 4.38, "learning_rate": 2.8098055790363487e-05, "loss": 0.6287, "step": 5182, "task_loss": 1.260284185409546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7111854553222656, "epoch": 4.38, "learning_rate": 2.8093829247675403e-05, "loss": 0.6655, "step": 5183, "task_loss": 1.1439898014068604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9234825372695923, "epoch": 4.38, "learning_rate": 2.8089602704987323e-05, "loss": 0.7595, "step": 5184, "task_loss": 1.217672348022461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4007294178009033, "epoch": 4.38, "learning_rate": 2.8085376162299243e-05, "loss": 0.4409, "step": 5185, "task_loss": 0.34053730964660645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7860308885574341, "epoch": 4.38, "learning_rate": 2.808114961961116e-05, "loss": 0.4376, "step": 5186, "task_loss": 0.5460903644561768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3694063723087311, "epoch": 4.38, "learning_rate": 2.807692307692308e-05, "loss": 0.4302, "step": 5187, "task_loss": 0.81557297706604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38470977544784546, "epoch": 4.39, "learning_rate": 2.8072696534235e-05, "loss": 0.4431, "step": 5188, "task_loss": 0.4834229648113251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5069899559020996, "epoch": 4.39, "learning_rate": 2.8068469991546915e-05, "loss": 0.4367, "step": 5189, "task_loss": 0.0913282111287117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6348316669464111, "epoch": 4.39, "learning_rate": 2.8064243448858834e-05, "loss": 0.5962, "step": 5190, "task_loss": 0.70924311876297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3900063633918762, "epoch": 4.39, "learning_rate": 2.8060016906170754e-05, "loss": 0.3692, "step": 5191, "task_loss": 0.291110634803772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5717165470123291, "epoch": 4.39, "learning_rate": 2.805579036348267e-05, "loss": 0.5706, "step": 5192, "task_loss": 0.9611908197402954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5871396064758301, "epoch": 4.39, "learning_rate": 2.805156382079459e-05, "loss": 0.4889, "step": 5193, "task_loss": 0.9330399036407471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4547578692436218, "epoch": 4.39, "learning_rate": 2.804733727810651e-05, "loss": 0.5616, "step": 5194, "task_loss": 0.7625985145568848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2721131443977356, "epoch": 4.39, "learning_rate": 2.8043110735418426e-05, "loss": 0.442, "step": 5195, "task_loss": 0.1250012218952179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.747524619102478, "epoch": 4.39, "learning_rate": 2.8038884192730346e-05, "loss": 0.6382, "step": 5196, "task_loss": 0.3939744234085083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6001373529434204, "epoch": 4.39, "learning_rate": 2.8034657650042266e-05, "loss": 0.4933, "step": 5197, "task_loss": 0.6505735516548157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7749235033988953, "epoch": 4.39, "learning_rate": 2.803043110735419e-05, "loss": 0.7562, "step": 5198, "task_loss": 1.0037953853607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4544030725955963, "epoch": 4.39, "learning_rate": 2.8026204564666102e-05, "loss": 0.5222, "step": 5199, "task_loss": 1.7628355026245117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4277747571468353, "epoch": 4.4, "learning_rate": 2.8021978021978025e-05, "loss": 0.4876, "step": 5200, "task_loss": 0.40000858902931213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36930519342422485, "epoch": 4.4, "learning_rate": 2.8017751479289945e-05, "loss": 0.6174, "step": 5201, "task_loss": 1.259460687637329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5621569156646729, "epoch": 4.4, "learning_rate": 2.8013524936601858e-05, "loss": 0.5693, "step": 5202, "task_loss": 0.687770664691925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44103580713272095, "epoch": 4.4, "learning_rate": 2.800929839391378e-05, "loss": 0.4953, "step": 5203, "task_loss": 1.1542632579803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5126107931137085, "epoch": 4.4, "learning_rate": 2.80050718512257e-05, "loss": 0.6372, "step": 5204, "task_loss": 0.7302573323249817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.457723468542099, "epoch": 4.4, "learning_rate": 2.8000845308537617e-05, "loss": 0.4851, "step": 5205, "task_loss": 0.6507857441902161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44336998462677, "epoch": 4.4, "learning_rate": 2.7996618765849537e-05, "loss": 0.5516, "step": 5206, "task_loss": 0.3427440822124481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5211141109466553, "epoch": 4.4, "learning_rate": 2.7992392223161456e-05, "loss": 0.585, "step": 5207, "task_loss": 0.43093645572662354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.62564617395401, "epoch": 4.4, "learning_rate": 2.7988165680473373e-05, "loss": 0.4695, "step": 5208, "task_loss": 1.0945377349853516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8042891025543213, "epoch": 4.4, "learning_rate": 2.7983939137785293e-05, "loss": 0.6085, "step": 5209, "task_loss": 0.5824911594390869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5441581010818481, "epoch": 4.4, "learning_rate": 2.7979712595097212e-05, "loss": 0.5682, "step": 5210, "task_loss": 0.9451205730438232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3167878985404968, "epoch": 4.4, "learning_rate": 2.7975486052409132e-05, "loss": 0.4606, "step": 5211, "task_loss": 0.356450617313385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6397414207458496, "epoch": 4.41, "learning_rate": 2.797125950972105e-05, "loss": 0.5088, "step": 5212, "task_loss": 1.4975876808166504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29197466373443604, "epoch": 4.41, "learning_rate": 2.7967032967032968e-05, "loss": 0.4009, "step": 5213, "task_loss": 0.5427900552749634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46088218688964844, "epoch": 4.41, "learning_rate": 2.7962806424344888e-05, "loss": 0.4312, "step": 5214, "task_loss": 0.38193684816360474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6076503396034241, "epoch": 4.41, "learning_rate": 2.7958579881656804e-05, "loss": 0.5127, "step": 5215, "task_loss": 0.8249279260635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.467257559299469, "epoch": 4.41, "learning_rate": 2.7954353338968724e-05, "loss": 0.5268, "step": 5216, "task_loss": 0.9494364261627197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9039942026138306, "epoch": 4.41, "learning_rate": 2.7950126796280647e-05, "loss": 0.6162, "step": 5217, "task_loss": 2.245042085647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.616887092590332, "epoch": 4.41, "learning_rate": 2.794590025359256e-05, "loss": 0.5327, "step": 5218, "task_loss": 0.709876298904419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6708094477653503, "epoch": 4.41, "learning_rate": 2.794167371090448e-05, "loss": 0.5921, "step": 5219, "task_loss": 0.3885692059993744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4749789237976074, "epoch": 4.41, "learning_rate": 2.7937447168216403e-05, "loss": 0.5761, "step": 5220, "task_loss": 0.6657482981681824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4687151312828064, "epoch": 4.41, "learning_rate": 2.7933220625528316e-05, "loss": 0.4517, "step": 5221, "task_loss": 1.0021352767944336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4453340768814087, "epoch": 4.41, "learning_rate": 2.792899408284024e-05, "loss": 0.4861, "step": 5222, "task_loss": 0.3758726119995117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32083624601364136, "epoch": 4.41, "learning_rate": 2.792476754015216e-05, "loss": 0.6274, "step": 5223, "task_loss": 0.9310301542282104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46359506249427795, "epoch": 4.42, "learning_rate": 2.792054099746407e-05, "loss": 0.5768, "step": 5224, "task_loss": 0.8322440981864929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6924983859062195, "epoch": 4.42, "learning_rate": 2.7916314454775995e-05, "loss": 0.4488, "step": 5225, "task_loss": 0.1947295069694519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5955932140350342, "epoch": 4.42, "learning_rate": 2.7912087912087915e-05, "loss": 0.5219, "step": 5226, "task_loss": 0.7560649514198303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4445628821849823, "epoch": 4.42, "learning_rate": 2.7907861369399834e-05, "loss": 0.4714, "step": 5227, "task_loss": 0.8212294578552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44620150327682495, "epoch": 4.42, "learning_rate": 2.790363482671175e-05, "loss": 0.4739, "step": 5228, "task_loss": 0.582520067691803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33446359634399414, "epoch": 4.42, "learning_rate": 2.789940828402367e-05, "loss": 0.4245, "step": 5229, "task_loss": 0.40171048045158386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6825902462005615, "epoch": 4.42, "learning_rate": 2.789518174133559e-05, "loss": 0.5452, "step": 5230, "task_loss": 0.5209140777587891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4893573820590973, "epoch": 4.42, "learning_rate": 2.7890955198647506e-05, "loss": 0.6725, "step": 5231, "task_loss": 0.9621965289115906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25065481662750244, "epoch": 4.42, "learning_rate": 2.7886728655959426e-05, "loss": 0.4579, "step": 5232, "task_loss": 0.46341046690940857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48631200194358826, "epoch": 4.42, "learning_rate": 2.7882502113271346e-05, "loss": 0.5992, "step": 5233, "task_loss": 1.2627606391906738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5348921418190002, "epoch": 4.42, "learning_rate": 2.7878275570583262e-05, "loss": 0.58, "step": 5234, "task_loss": 0.4680885672569275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5864924788475037, "epoch": 4.42, "learning_rate": 2.7874049027895182e-05, "loss": 0.6615, "step": 5235, "task_loss": 1.0830177068710327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4064631164073944, "epoch": 4.43, "learning_rate": 2.7869822485207102e-05, "loss": 0.7178, "step": 5236, "task_loss": 1.1240168809890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39533764123916626, "epoch": 4.43, "learning_rate": 2.7865595942519018e-05, "loss": 0.4301, "step": 5237, "task_loss": 0.17977426946163177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4698025584220886, "epoch": 4.43, "learning_rate": 2.7861369399830938e-05, "loss": 0.546, "step": 5238, "task_loss": 0.7148550152778625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7171174883842468, "epoch": 4.43, "learning_rate": 2.785714285714286e-05, "loss": 0.5485, "step": 5239, "task_loss": 0.7230456471443176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4743465185165405, "epoch": 4.43, "learning_rate": 2.785291631445478e-05, "loss": 0.477, "step": 5240, "task_loss": 0.6340458989143372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6087259650230408, "epoch": 4.43, "learning_rate": 2.7848689771766694e-05, "loss": 0.5505, "step": 5241, "task_loss": 0.8480997681617737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9905765652656555, "epoch": 4.43, "learning_rate": 2.7844463229078617e-05, "loss": 0.6051, "step": 5242, "task_loss": 0.7984235286712646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35354849696159363, "epoch": 4.43, "learning_rate": 2.7840236686390537e-05, "loss": 0.6114, "step": 5243, "task_loss": 0.1469717025756836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.547687292098999, "epoch": 4.43, "learning_rate": 2.783601014370245e-05, "loss": 0.5184, "step": 5244, "task_loss": 1.2660895586013794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.717499852180481, "epoch": 4.43, "learning_rate": 2.7831783601014373e-05, "loss": 0.6051, "step": 5245, "task_loss": 0.6356750726699829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4846644997596741, "epoch": 4.43, "learning_rate": 2.7827557058326292e-05, "loss": 0.4907, "step": 5246, "task_loss": 0.9908105731010437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.59178227186203, "epoch": 4.44, "learning_rate": 2.782333051563821e-05, "loss": 0.604, "step": 5247, "task_loss": 0.43177640438079834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6043274998664856, "epoch": 4.44, "learning_rate": 2.781910397295013e-05, "loss": 0.6397, "step": 5248, "task_loss": 0.39989933371543884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.667040228843689, "epoch": 4.44, "learning_rate": 2.7814877430262048e-05, "loss": 0.6386, "step": 5249, "task_loss": 1.2049132585525513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8686316609382629, "epoch": 4.44, "learning_rate": 2.7810650887573965e-05, "loss": 0.7103, "step": 5250, "task_loss": 0.2922057807445526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8328902721405029, "epoch": 4.44, "learning_rate": 2.7806424344885884e-05, "loss": 0.5476, "step": 5251, "task_loss": 0.49473509192466736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32229751348495483, "epoch": 4.44, "learning_rate": 2.7802197802197804e-05, "loss": 0.4756, "step": 5252, "task_loss": 1.0162307024002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5075170993804932, "epoch": 4.44, "learning_rate": 2.779797125950972e-05, "loss": 0.5952, "step": 5253, "task_loss": 0.9417028427124023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48733970522880554, "epoch": 4.44, "learning_rate": 2.779374471682164e-05, "loss": 0.5232, "step": 5254, "task_loss": 0.5937368869781494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6276860237121582, "epoch": 4.44, "learning_rate": 2.778951817413356e-05, "loss": 0.4717, "step": 5255, "task_loss": 1.359994649887085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2065477967262268, "epoch": 4.44, "learning_rate": 2.7785291631445483e-05, "loss": 0.4319, "step": 5256, "task_loss": 0.3590351939201355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3995498716831207, "epoch": 4.44, "learning_rate": 2.7781065088757396e-05, "loss": 0.4323, "step": 5257, "task_loss": 0.4106886684894562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33497872948646545, "epoch": 4.44, "learning_rate": 2.7776838546069316e-05, "loss": 0.3968, "step": 5258, "task_loss": 0.6700830459594727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45254287123680115, "epoch": 4.45, "learning_rate": 2.777261200338124e-05, "loss": 0.5677, "step": 5259, "task_loss": 0.5990794897079468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8443390130996704, "epoch": 4.45, "learning_rate": 2.7768385460693152e-05, "loss": 0.5287, "step": 5260, "task_loss": 0.4160047173500061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8077751994132996, "epoch": 4.45, "learning_rate": 2.776415891800507e-05, "loss": 0.5666, "step": 5261, "task_loss": 0.8264917135238647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5909587144851685, "epoch": 4.45, "learning_rate": 2.7759932375316995e-05, "loss": 0.6071, "step": 5262, "task_loss": 0.17001473903656006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21351748704910278, "epoch": 4.45, "learning_rate": 2.7755705832628908e-05, "loss": 0.4412, "step": 5263, "task_loss": 0.4138297736644745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27882158756256104, "epoch": 4.45, "learning_rate": 2.775147928994083e-05, "loss": 0.4527, "step": 5264, "task_loss": 0.19578656554222107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9294151067733765, "epoch": 4.45, "learning_rate": 2.774725274725275e-05, "loss": 0.5838, "step": 5265, "task_loss": 0.5137839913368225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8721062541007996, "epoch": 4.45, "learning_rate": 2.7743026204564663e-05, "loss": 0.745, "step": 5266, "task_loss": 1.0089004039764404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7218141555786133, "epoch": 4.45, "learning_rate": 2.7738799661876587e-05, "loss": 0.5768, "step": 5267, "task_loss": 1.8011058568954468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5719237327575684, "epoch": 4.45, "learning_rate": 2.7734573119188506e-05, "loss": 0.4491, "step": 5268, "task_loss": 0.2796790897846222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.443045973777771, "epoch": 4.45, "learning_rate": 2.7730346576500426e-05, "loss": 0.4433, "step": 5269, "task_loss": 0.3776971101760864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5205014944076538, "epoch": 4.45, "learning_rate": 2.7726120033812342e-05, "loss": 0.5577, "step": 5270, "task_loss": 1.1317591667175293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4009777307510376, "epoch": 4.46, "learning_rate": 2.7721893491124262e-05, "loss": 0.4991, "step": 5271, "task_loss": 0.39043381810188293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4715713560581207, "epoch": 4.46, "learning_rate": 2.7717666948436182e-05, "loss": 0.4602, "step": 5272, "task_loss": 0.5938316583633423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8763389587402344, "epoch": 4.46, "learning_rate": 2.7713440405748098e-05, "loss": 0.5428, "step": 5273, "task_loss": 0.6430569887161255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48041558265686035, "epoch": 4.46, "learning_rate": 2.7709213863060018e-05, "loss": 0.5199, "step": 5274, "task_loss": 0.4133508801460266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3968963623046875, "epoch": 4.46, "learning_rate": 2.7704987320371938e-05, "loss": 0.4196, "step": 5275, "task_loss": 0.39113008975982666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5474100112915039, "epoch": 4.46, "learning_rate": 2.7700760777683854e-05, "loss": 0.7283, "step": 5276, "task_loss": 1.3075909614562988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6922458410263062, "epoch": 4.46, "learning_rate": 2.7696534234995774e-05, "loss": 0.4851, "step": 5277, "task_loss": 0.9054099917411804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29515084624290466, "epoch": 4.46, "learning_rate": 2.7692307692307694e-05, "loss": 0.4854, "step": 5278, "task_loss": 0.2939417362213135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5836045145988464, "epoch": 4.46, "learning_rate": 2.768808114961961e-05, "loss": 0.5121, "step": 5279, "task_loss": 0.8165667057037354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43753018975257874, "epoch": 4.46, "learning_rate": 2.768385460693153e-05, "loss": 0.5125, "step": 5280, "task_loss": 1.2097663879394531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42646661400794983, "epoch": 4.46, "learning_rate": 2.7679628064243453e-05, "loss": 0.5244, "step": 5281, "task_loss": 0.4319024085998535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5711855292320251, "epoch": 4.46, "learning_rate": 2.7675401521555366e-05, "loss": 0.4711, "step": 5282, "task_loss": 1.2479275465011597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4802483916282654, "epoch": 4.47, "learning_rate": 2.7671174978867285e-05, "loss": 0.5311, "step": 5283, "task_loss": 0.720216691493988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4605392813682556, "epoch": 4.47, "learning_rate": 2.766694843617921e-05, "loss": 0.5406, "step": 5284, "task_loss": 0.6428165435791016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.451355904340744, "epoch": 4.47, "learning_rate": 2.766272189349113e-05, "loss": 0.5133, "step": 5285, "task_loss": 0.3209630846977234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5163174867630005, "epoch": 4.47, "learning_rate": 2.7658495350803045e-05, "loss": 0.5538, "step": 5286, "task_loss": 0.5297533273696899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40374863147735596, "epoch": 4.47, "learning_rate": 2.7654268808114964e-05, "loss": 0.5162, "step": 5287, "task_loss": 0.6043429374694824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33275893330574036, "epoch": 4.47, "learning_rate": 2.7650042265426884e-05, "loss": 0.5044, "step": 5288, "task_loss": 0.7751222848892212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46711021661758423, "epoch": 4.47, "learning_rate": 2.76458157227388e-05, "loss": 0.5104, "step": 5289, "task_loss": 0.5457462668418884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8899959921836853, "epoch": 4.47, "learning_rate": 2.764158918005072e-05, "loss": 0.6966, "step": 5290, "task_loss": 0.5391844511032104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6285883784294128, "epoch": 4.47, "learning_rate": 2.763736263736264e-05, "loss": 0.5784, "step": 5291, "task_loss": 0.7773985862731934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5362526774406433, "epoch": 4.47, "learning_rate": 2.7633136094674556e-05, "loss": 0.5492, "step": 5292, "task_loss": 0.3517150580883026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30984261631965637, "epoch": 4.47, "learning_rate": 2.7628909551986476e-05, "loss": 0.4233, "step": 5293, "task_loss": 0.30853402614593506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33329862356185913, "epoch": 4.47, "learning_rate": 2.7624683009298396e-05, "loss": 0.4847, "step": 5294, "task_loss": 0.32293060421943665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25270822644233704, "epoch": 4.48, "learning_rate": 2.7620456466610312e-05, "loss": 0.4156, "step": 5295, "task_loss": 0.6066348552703857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34133803844451904, "epoch": 4.48, "learning_rate": 2.7616229923922232e-05, "loss": 0.4313, "step": 5296, "task_loss": 1.1871721744537354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5936399102210999, "epoch": 4.48, "learning_rate": 2.761200338123415e-05, "loss": 0.4867, "step": 5297, "task_loss": 0.6456088423728943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7116214632987976, "epoch": 4.48, "learning_rate": 2.7607776838546075e-05, "loss": 0.8233, "step": 5298, "task_loss": 1.1166901588439941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8979636430740356, "epoch": 4.48, "learning_rate": 2.7603550295857988e-05, "loss": 0.6894, "step": 5299, "task_loss": 0.752702534198761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2484954297542572, "epoch": 4.48, "learning_rate": 2.7599323753169907e-05, "loss": 0.3845, "step": 5300, "task_loss": 0.35879749059677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5948817729949951, "epoch": 4.48, "learning_rate": 2.759509721048183e-05, "loss": 0.6222, "step": 5301, "task_loss": 2.029428482055664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7001022100448608, "epoch": 4.48, "learning_rate": 2.7590870667793744e-05, "loss": 0.602, "step": 5302, "task_loss": 0.42063450813293457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47895729541778564, "epoch": 4.48, "learning_rate": 2.7586644125105667e-05, "loss": 0.4279, "step": 5303, "task_loss": 1.356555700302124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9620168209075928, "epoch": 4.48, "learning_rate": 2.7582417582417586e-05, "loss": 0.9009, "step": 5304, "task_loss": 1.9546177387237549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42540958523750305, "epoch": 4.48, "learning_rate": 2.75781910397295e-05, "loss": 0.6444, "step": 5305, "task_loss": 0.9007807970046997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4218650162220001, "epoch": 4.48, "learning_rate": 2.7573964497041422e-05, "loss": 0.4138, "step": 5306, "task_loss": 0.2950070798397064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5379204750061035, "epoch": 4.49, "learning_rate": 2.7569737954353342e-05, "loss": 0.5827, "step": 5307, "task_loss": 0.5207168459892273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6428512930870056, "epoch": 4.49, "learning_rate": 2.7565511411665255e-05, "loss": 0.7337, "step": 5308, "task_loss": 0.8318116664886475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7405051589012146, "epoch": 4.49, "learning_rate": 2.756128486897718e-05, "loss": 0.4726, "step": 5309, "task_loss": 1.6033854484558105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3466702103614807, "epoch": 4.49, "learning_rate": 2.7557058326289098e-05, "loss": 0.5209, "step": 5310, "task_loss": 1.1743545532226562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37898385524749756, "epoch": 4.49, "learning_rate": 2.7552831783601014e-05, "loss": 0.5949, "step": 5311, "task_loss": 0.7249733805656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36499926447868347, "epoch": 4.49, "learning_rate": 2.7548605240912934e-05, "loss": 0.497, "step": 5312, "task_loss": 0.38812413811683655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.256285697221756, "epoch": 4.49, "learning_rate": 2.7544378698224854e-05, "loss": 0.4844, "step": 5313, "task_loss": 0.4137265384197235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7692514657974243, "epoch": 4.49, "learning_rate": 2.7540152155536774e-05, "loss": 0.5162, "step": 5314, "task_loss": 1.0254405736923218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3826620876789093, "epoch": 4.49, "learning_rate": 2.753592561284869e-05, "loss": 0.6084, "step": 5315, "task_loss": 0.1097988486289978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48812592029571533, "epoch": 4.49, "learning_rate": 2.753169907016061e-05, "loss": 0.491, "step": 5316, "task_loss": 0.5517425537109375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32362592220306396, "epoch": 4.49, "learning_rate": 2.752747252747253e-05, "loss": 0.5018, "step": 5317, "task_loss": 0.23633944988250732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5260745286941528, "epoch": 4.5, "learning_rate": 2.7523245984784446e-05, "loss": 0.5458, "step": 5318, "task_loss": 0.2933235764503479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6433345079421997, "epoch": 4.5, "learning_rate": 2.7519019442096366e-05, "loss": 0.5014, "step": 5319, "task_loss": 0.7560244202613831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.53708815574646, "epoch": 4.5, "learning_rate": 2.751479289940829e-05, "loss": 0.5582, "step": 5320, "task_loss": 0.914886474609375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49981334805488586, "epoch": 4.5, "learning_rate": 2.75105663567202e-05, "loss": 0.5058, "step": 5321, "task_loss": 0.5407476425170898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4667494297027588, "epoch": 4.5, "learning_rate": 2.750633981403212e-05, "loss": 0.4379, "step": 5322, "task_loss": 0.37113475799560547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41166138648986816, "epoch": 4.5, "learning_rate": 2.7502113271344044e-05, "loss": 0.4221, "step": 5323, "task_loss": 0.23486945033073425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7390798330307007, "epoch": 4.5, "learning_rate": 2.7497886728655957e-05, "loss": 0.5183, "step": 5324, "task_loss": 0.24238334596157074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6582691073417664, "epoch": 4.5, "learning_rate": 2.7493660185967877e-05, "loss": 0.5836, "step": 5325, "task_loss": 0.4756070375442505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3439198136329651, "epoch": 4.5, "learning_rate": 2.74894336432798e-05, "loss": 0.663, "step": 5326, "task_loss": 0.4897440969944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7269107699394226, "epoch": 4.5, "learning_rate": 2.748520710059172e-05, "loss": 0.4789, "step": 5327, "task_loss": 0.5211256146430969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5550800561904907, "epoch": 4.5, "learning_rate": 2.7480980557903636e-05, "loss": 0.4562, "step": 5328, "task_loss": 0.6577306389808655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43927112221717834, "epoch": 4.5, "learning_rate": 2.7476754015215556e-05, "loss": 0.4786, "step": 5329, "task_loss": 0.743955671787262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20793700218200684, "epoch": 4.51, "learning_rate": 2.7472527472527476e-05, "loss": 0.371, "step": 5330, "task_loss": 0.0683882087469101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5816153287887573, "epoch": 4.51, "learning_rate": 2.7468300929839392e-05, "loss": 0.5147, "step": 5331, "task_loss": 0.9620684385299683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36975929141044617, "epoch": 4.51, "learning_rate": 2.7464074387151312e-05, "loss": 0.4648, "step": 5332, "task_loss": 0.9288278818130493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40742093324661255, "epoch": 4.51, "learning_rate": 2.7459847844463232e-05, "loss": 0.5185, "step": 5333, "task_loss": 0.4179134964942932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5236296653747559, "epoch": 4.51, "learning_rate": 2.7455621301775148e-05, "loss": 0.741, "step": 5334, "task_loss": 1.2751891613006592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6133717894554138, "epoch": 4.51, "learning_rate": 2.7451394759087068e-05, "loss": 0.5611, "step": 5335, "task_loss": 1.061793565750122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5870427489280701, "epoch": 4.51, "learning_rate": 2.7447168216398988e-05, "loss": 0.4618, "step": 5336, "task_loss": 0.757529616355896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35291415452957153, "epoch": 4.51, "learning_rate": 2.7442941673710904e-05, "loss": 0.4923, "step": 5337, "task_loss": 0.08009390532970428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6521784067153931, "epoch": 4.51, "learning_rate": 2.7438715131022824e-05, "loss": 0.736, "step": 5338, "task_loss": 1.6338598728179932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46560460329055786, "epoch": 4.51, "learning_rate": 2.7434488588334743e-05, "loss": 0.3905, "step": 5339, "task_loss": 0.8386006951332092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5951083302497864, "epoch": 4.51, "learning_rate": 2.743026204564666e-05, "loss": 0.5619, "step": 5340, "task_loss": 1.3118579387664795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4544222354888916, "epoch": 4.51, "learning_rate": 2.742603550295858e-05, "loss": 0.5228, "step": 5341, "task_loss": 0.3645521402359009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4155057668685913, "epoch": 4.52, "learning_rate": 2.74218089602705e-05, "loss": 0.5417, "step": 5342, "task_loss": 0.1335374116897583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37532559037208557, "epoch": 4.52, "learning_rate": 2.7417582417582422e-05, "loss": 0.6281, "step": 5343, "task_loss": 0.048613499850034714 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.755224347114563, "epoch": 4.52, "learning_rate": 2.7413355874894335e-05, "loss": 0.6334, "step": 5344, "task_loss": 0.8768181204795837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39834776520729065, "epoch": 4.52, "learning_rate": 2.740912933220626e-05, "loss": 0.4996, "step": 5345, "task_loss": 0.4868217706680298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26929712295532227, "epoch": 4.52, "learning_rate": 2.7404902789518178e-05, "loss": 0.6149, "step": 5346, "task_loss": 0.2461363524198532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6296822428703308, "epoch": 4.52, "learning_rate": 2.740067624683009e-05, "loss": 0.6245, "step": 5347, "task_loss": 1.5669865608215332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3427096903324127, "epoch": 4.52, "learning_rate": 2.7396449704142014e-05, "loss": 0.4173, "step": 5348, "task_loss": 0.5462246537208557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3833007514476776, "epoch": 4.52, "learning_rate": 2.7392223161453934e-05, "loss": 0.5529, "step": 5349, "task_loss": 0.740264356136322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48672664165496826, "epoch": 4.52, "learning_rate": 2.738799661876585e-05, "loss": 0.5163, "step": 5350, "task_loss": 1.0009360313415527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4030136466026306, "epoch": 4.52, "learning_rate": 2.738377007607777e-05, "loss": 0.5861, "step": 5351, "task_loss": 0.3999110460281372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2966085374355316, "epoch": 4.52, "learning_rate": 2.737954353338969e-05, "loss": 0.5322, "step": 5352, "task_loss": 0.07671620696783066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7703042030334473, "epoch": 4.52, "learning_rate": 2.7375316990701606e-05, "loss": 0.5746, "step": 5353, "task_loss": 0.9699040651321411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5411189794540405, "epoch": 4.53, "learning_rate": 2.7371090448013526e-05, "loss": 0.4717, "step": 5354, "task_loss": 1.0553245544433594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4826720356941223, "epoch": 4.53, "learning_rate": 2.7366863905325446e-05, "loss": 0.5296, "step": 5355, "task_loss": 0.699677050113678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9032210111618042, "epoch": 4.53, "learning_rate": 2.7362637362637365e-05, "loss": 0.5969, "step": 5356, "task_loss": 1.0561261177062988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43842482566833496, "epoch": 4.53, "learning_rate": 2.7358410819949282e-05, "loss": 0.4893, "step": 5357, "task_loss": 0.14813998341560364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7673283219337463, "epoch": 4.53, "learning_rate": 2.73541842772612e-05, "loss": 0.606, "step": 5358, "task_loss": 0.9109212160110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7322139739990234, "epoch": 4.53, "learning_rate": 2.734995773457312e-05, "loss": 0.5907, "step": 5359, "task_loss": 1.4554837942123413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.861885666847229, "epoch": 4.53, "learning_rate": 2.7345731191885038e-05, "loss": 0.6039, "step": 5360, "task_loss": 1.3273420333862305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40028080344200134, "epoch": 4.53, "learning_rate": 2.7341504649196957e-05, "loss": 0.5511, "step": 5361, "task_loss": 0.21868599951267242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7488853931427002, "epoch": 4.53, "learning_rate": 2.733727810650888e-05, "loss": 0.7021, "step": 5362, "task_loss": 0.641872227191925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6559021472930908, "epoch": 4.53, "learning_rate": 2.7333051563820793e-05, "loss": 0.6907, "step": 5363, "task_loss": 1.3550121784210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48939910531044006, "epoch": 4.53, "learning_rate": 2.7328825021132713e-05, "loss": 0.5435, "step": 5364, "task_loss": 1.0295660495758057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5626007914543152, "epoch": 4.53, "learning_rate": 2.7324598478444636e-05, "loss": 0.4786, "step": 5365, "task_loss": 0.550451934337616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5613609552383423, "epoch": 4.54, "learning_rate": 2.732037193575655e-05, "loss": 0.4789, "step": 5366, "task_loss": 0.808321475982666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4478108286857605, "epoch": 4.54, "learning_rate": 2.7316145393068472e-05, "loss": 0.4906, "step": 5367, "task_loss": 0.33435213565826416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5399828553199768, "epoch": 4.54, "learning_rate": 2.7311918850380392e-05, "loss": 0.4696, "step": 5368, "task_loss": 0.4496751129627228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41021180152893066, "epoch": 4.54, "learning_rate": 2.7307692307692305e-05, "loss": 0.6109, "step": 5369, "task_loss": 0.4057617485523224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3225499391555786, "epoch": 4.54, "learning_rate": 2.7303465765004228e-05, "loss": 0.4364, "step": 5370, "task_loss": 0.618878185749054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4486207365989685, "epoch": 4.54, "learning_rate": 2.7299239222316148e-05, "loss": 0.4617, "step": 5371, "task_loss": 0.4366832971572876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2765759825706482, "epoch": 4.54, "learning_rate": 2.7295012679628068e-05, "loss": 0.3803, "step": 5372, "task_loss": 0.42383480072021484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6151210069656372, "epoch": 4.54, "learning_rate": 2.7290786136939984e-05, "loss": 0.4874, "step": 5373, "task_loss": 1.1104873418807983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4537014663219452, "epoch": 4.54, "learning_rate": 2.7286559594251904e-05, "loss": 0.4734, "step": 5374, "task_loss": 0.4669799506664276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6336425542831421, "epoch": 4.54, "learning_rate": 2.7282333051563823e-05, "loss": 0.6792, "step": 5375, "task_loss": 0.23321525752544403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41254305839538574, "epoch": 4.54, "learning_rate": 2.727810650887574e-05, "loss": 0.5366, "step": 5376, "task_loss": 0.2227546125650406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5170974135398865, "epoch": 4.54, "learning_rate": 2.727387996618766e-05, "loss": 0.4602, "step": 5377, "task_loss": 0.4968531131744385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7210485935211182, "epoch": 4.55, "learning_rate": 2.726965342349958e-05, "loss": 0.444, "step": 5378, "task_loss": 0.5338968634605408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6378473043441772, "epoch": 4.55, "learning_rate": 2.7265426880811496e-05, "loss": 0.6686, "step": 5379, "task_loss": 0.381631463766098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4934467077255249, "epoch": 4.55, "learning_rate": 2.7261200338123415e-05, "loss": 0.4727, "step": 5380, "task_loss": 0.5844487547874451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7077147960662842, "epoch": 4.55, "learning_rate": 2.7256973795435335e-05, "loss": 0.6066, "step": 5381, "task_loss": 0.6631092429161072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4296506643295288, "epoch": 4.55, "learning_rate": 2.725274725274725e-05, "loss": 0.4946, "step": 5382, "task_loss": 0.6394067406654358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.596145510673523, "epoch": 4.55, "learning_rate": 2.724852071005917e-05, "loss": 0.5587, "step": 5383, "task_loss": 0.9702220559120178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5406750440597534, "epoch": 4.55, "learning_rate": 2.7244294167371094e-05, "loss": 0.5037, "step": 5384, "task_loss": 0.48072123527526855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49670472741127014, "epoch": 4.55, "learning_rate": 2.7240067624683014e-05, "loss": 0.5095, "step": 5385, "task_loss": 1.3811817169189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3292773962020874, "epoch": 4.55, "learning_rate": 2.7235841081994927e-05, "loss": 0.5871, "step": 5386, "task_loss": 1.177852988243103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33358633518218994, "epoch": 4.55, "learning_rate": 2.723161453930685e-05, "loss": 0.7863, "step": 5387, "task_loss": 1.5302704572677612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3668261170387268, "epoch": 4.55, "learning_rate": 2.722738799661877e-05, "loss": 0.6719, "step": 5388, "task_loss": 0.8525644540786743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7261632680892944, "epoch": 4.56, "learning_rate": 2.7223161453930683e-05, "loss": 0.5054, "step": 5389, "task_loss": 1.2605348825454712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9297285079956055, "epoch": 4.56, "learning_rate": 2.7218934911242606e-05, "loss": 0.5816, "step": 5390, "task_loss": 2.27435040473938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7317273616790771, "epoch": 4.56, "learning_rate": 2.7214708368554526e-05, "loss": 0.5447, "step": 5391, "task_loss": 0.6920271515846252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4560929238796234, "epoch": 4.56, "learning_rate": 2.7210481825866442e-05, "loss": 0.4907, "step": 5392, "task_loss": 0.4183073043823242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2572377026081085, "epoch": 4.56, "learning_rate": 2.7206255283178362e-05, "loss": 0.5336, "step": 5393, "task_loss": 1.0202858448028564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6760679483413696, "epoch": 4.56, "learning_rate": 2.720202874049028e-05, "loss": 0.6966, "step": 5394, "task_loss": 0.5301749110221863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7935565710067749, "epoch": 4.56, "learning_rate": 2.7197802197802198e-05, "loss": 0.508, "step": 5395, "task_loss": 0.9583966135978699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4155125916004181, "epoch": 4.56, "learning_rate": 2.7193575655114118e-05, "loss": 0.527, "step": 5396, "task_loss": 0.5703842639923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.596451461315155, "epoch": 4.56, "learning_rate": 2.7189349112426037e-05, "loss": 0.629, "step": 5397, "task_loss": 1.1101837158203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6796680688858032, "epoch": 4.56, "learning_rate": 2.7185122569737954e-05, "loss": 0.6547, "step": 5398, "task_loss": 1.5831326246261597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2625083923339844, "epoch": 4.56, "learning_rate": 2.7180896027049873e-05, "loss": 0.4714, "step": 5399, "task_loss": 0.16241151094436646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22443480789661407, "epoch": 4.56, "learning_rate": 2.7176669484361793e-05, "loss": 0.4459, "step": 5400, "task_loss": 0.4830724895000458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6096628904342651, "epoch": 4.57, "learning_rate": 2.7172442941673716e-05, "loss": 0.524, "step": 5401, "task_loss": 0.3656918406486511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5304520726203918, "epoch": 4.57, "learning_rate": 2.716821639898563e-05, "loss": 0.5741, "step": 5402, "task_loss": 0.6311800479888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7263672351837158, "epoch": 4.57, "learning_rate": 2.716398985629755e-05, "loss": 0.5292, "step": 5403, "task_loss": 0.5756078958511353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.412069171667099, "epoch": 4.57, "learning_rate": 2.7159763313609472e-05, "loss": 0.5627, "step": 5404, "task_loss": 0.8759124875068665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4952296018600464, "epoch": 4.57, "learning_rate": 2.7155536770921385e-05, "loss": 0.4858, "step": 5405, "task_loss": 0.047556180506944656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3848026394844055, "epoch": 4.57, "learning_rate": 2.7151310228233305e-05, "loss": 0.4534, "step": 5406, "task_loss": 1.5189660787582397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42357152700424194, "epoch": 4.57, "learning_rate": 2.7147083685545228e-05, "loss": 0.4804, "step": 5407, "task_loss": 1.2142369747161865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7135859727859497, "epoch": 4.57, "learning_rate": 2.714285714285714e-05, "loss": 0.633, "step": 5408, "task_loss": 0.8967861533164978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3272915184497833, "epoch": 4.57, "learning_rate": 2.7138630600169064e-05, "loss": 0.6056, "step": 5409, "task_loss": 0.2025403529405594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1530689299106598, "epoch": 4.57, "learning_rate": 2.7134404057480984e-05, "loss": 0.4313, "step": 5410, "task_loss": 0.26233577728271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2940325438976288, "epoch": 4.57, "learning_rate": 2.7130177514792897e-05, "loss": 0.549, "step": 5411, "task_loss": 0.31544169783592224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4194158911705017, "epoch": 4.57, "learning_rate": 2.712595097210482e-05, "loss": 0.5072, "step": 5412, "task_loss": 1.0105589628219604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22945697605609894, "epoch": 4.58, "learning_rate": 2.712172442941674e-05, "loss": 0.4961, "step": 5413, "task_loss": 0.27859506011009216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48602455854415894, "epoch": 4.58, "learning_rate": 2.711749788672866e-05, "loss": 0.5022, "step": 5414, "task_loss": 0.3840574026107788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5287999510765076, "epoch": 4.58, "learning_rate": 2.7113271344040576e-05, "loss": 0.4271, "step": 5415, "task_loss": 0.3990764617919922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2725510597229004, "epoch": 4.58, "learning_rate": 2.7109044801352495e-05, "loss": 0.4651, "step": 5416, "task_loss": 0.3067554235458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6128413081169128, "epoch": 4.58, "learning_rate": 2.7104818258664415e-05, "loss": 0.6796, "step": 5417, "task_loss": 0.5863294005393982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4496791660785675, "epoch": 4.58, "learning_rate": 2.710059171597633e-05, "loss": 0.598, "step": 5418, "task_loss": 0.6556788086891174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7718957662582397, "epoch": 4.58, "learning_rate": 2.709636517328825e-05, "loss": 0.5533, "step": 5419, "task_loss": 1.0133272409439087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5502604842185974, "epoch": 4.58, "learning_rate": 2.709213863060017e-05, "loss": 0.474, "step": 5420, "task_loss": 0.5062881112098694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6409262418746948, "epoch": 4.58, "learning_rate": 2.7087912087912087e-05, "loss": 0.5903, "step": 5421, "task_loss": 1.1804314851760864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4186840057373047, "epoch": 4.58, "learning_rate": 2.7083685545224007e-05, "loss": 0.5779, "step": 5422, "task_loss": 0.17031022906303406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20987167954444885, "epoch": 4.58, "learning_rate": 2.7079459002535927e-05, "loss": 0.4459, "step": 5423, "task_loss": 0.5038543939590454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8138952255249023, "epoch": 4.58, "learning_rate": 2.7075232459847843e-05, "loss": 0.6911, "step": 5424, "task_loss": 0.4157523810863495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4471595883369446, "epoch": 4.59, "learning_rate": 2.7071005917159763e-05, "loss": 0.5376, "step": 5425, "task_loss": 0.5205883979797363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2984365224838257, "epoch": 4.59, "learning_rate": 2.7066779374471686e-05, "loss": 0.3926, "step": 5426, "task_loss": 0.26941877603530884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5124970078468323, "epoch": 4.59, "learning_rate": 2.70625528317836e-05, "loss": 0.4878, "step": 5427, "task_loss": 1.5197886228561401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7513706684112549, "epoch": 4.59, "learning_rate": 2.705832628909552e-05, "loss": 0.5375, "step": 5428, "task_loss": 0.1762867122888565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47924187779426575, "epoch": 4.59, "learning_rate": 2.7054099746407442e-05, "loss": 0.5484, "step": 5429, "task_loss": 0.4806961715221405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5714477300643921, "epoch": 4.59, "learning_rate": 2.704987320371936e-05, "loss": 0.421, "step": 5430, "task_loss": 0.9996347427368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46663421392440796, "epoch": 4.59, "learning_rate": 2.7045646661031278e-05, "loss": 0.4469, "step": 5431, "task_loss": 0.48486167192459106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7124150395393372, "epoch": 4.59, "learning_rate": 2.7041420118343198e-05, "loss": 0.4811, "step": 5432, "task_loss": 0.6984152793884277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3023144006729126, "epoch": 4.59, "learning_rate": 2.7037193575655117e-05, "loss": 0.6099, "step": 5433, "task_loss": 0.5038154125213623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4609799385070801, "epoch": 4.59, "learning_rate": 2.7032967032967034e-05, "loss": 0.6469, "step": 5434, "task_loss": 0.7658967971801758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9310113787651062, "epoch": 4.59, "learning_rate": 2.7028740490278954e-05, "loss": 0.5694, "step": 5435, "task_loss": 1.0580936670303345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4861798882484436, "epoch": 4.59, "learning_rate": 2.7024513947590873e-05, "loss": 0.55, "step": 5436, "task_loss": 1.2361242771148682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3817145824432373, "epoch": 4.6, "learning_rate": 2.702028740490279e-05, "loss": 0.3634, "step": 5437, "task_loss": 0.9464032053947449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6354399919509888, "epoch": 4.6, "learning_rate": 2.701606086221471e-05, "loss": 0.4569, "step": 5438, "task_loss": 0.7765830159187317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31423741579055786, "epoch": 4.6, "learning_rate": 2.701183431952663e-05, "loss": 0.462, "step": 5439, "task_loss": 0.7643583416938782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7644920349121094, "epoch": 4.6, "learning_rate": 2.7007607776838545e-05, "loss": 0.7352, "step": 5440, "task_loss": 0.6541059613227844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.624780535697937, "epoch": 4.6, "learning_rate": 2.7003381234150465e-05, "loss": 0.5336, "step": 5441, "task_loss": 0.9501602053642273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5203424096107483, "epoch": 4.6, "learning_rate": 2.6999154691462385e-05, "loss": 0.4896, "step": 5442, "task_loss": 0.8856216073036194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3527968227863312, "epoch": 4.6, "learning_rate": 2.6994928148774308e-05, "loss": 0.4545, "step": 5443, "task_loss": 0.1643494814634323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.355309247970581, "epoch": 4.6, "learning_rate": 2.699070160608622e-05, "loss": 0.8362, "step": 5444, "task_loss": 0.8305923938751221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3862878680229187, "epoch": 4.6, "learning_rate": 2.698647506339814e-05, "loss": 0.4791, "step": 5445, "task_loss": 0.22194339334964752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9239904880523682, "epoch": 4.6, "learning_rate": 2.6982248520710064e-05, "loss": 0.655, "step": 5446, "task_loss": 0.6507148146629333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4985984265804291, "epoch": 4.6, "learning_rate": 2.6978021978021977e-05, "loss": 0.6384, "step": 5447, "task_loss": 0.453459769487381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4208281338214874, "epoch": 4.6, "learning_rate": 2.69737954353339e-05, "loss": 0.5135, "step": 5448, "task_loss": 0.4005371332168579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7169245481491089, "epoch": 4.61, "learning_rate": 2.696956889264582e-05, "loss": 0.6347, "step": 5449, "task_loss": 0.8415381908416748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31406426429748535, "epoch": 4.61, "learning_rate": 2.6965342349957733e-05, "loss": 0.4154, "step": 5450, "task_loss": 0.44339028000831604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35602590441703796, "epoch": 4.61, "learning_rate": 2.6961115807269656e-05, "loss": 0.4337, "step": 5451, "task_loss": 0.7294882535934448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5621517896652222, "epoch": 4.61, "learning_rate": 2.6956889264581576e-05, "loss": 0.4816, "step": 5452, "task_loss": 0.8586968183517456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7009731531143188, "epoch": 4.61, "learning_rate": 2.695266272189349e-05, "loss": 0.6647, "step": 5453, "task_loss": 1.2892886400222778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6520194411277771, "epoch": 4.61, "learning_rate": 2.694843617920541e-05, "loss": 0.607, "step": 5454, "task_loss": 1.3366714715957642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4571188986301422, "epoch": 4.61, "learning_rate": 2.694420963651733e-05, "loss": 0.6976, "step": 5455, "task_loss": 1.25129234790802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22270163893699646, "epoch": 4.61, "learning_rate": 2.6939983093829248e-05, "loss": 0.5555, "step": 5456, "task_loss": 0.17719006538391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5421428084373474, "epoch": 4.61, "learning_rate": 2.6935756551141167e-05, "loss": 0.4608, "step": 5457, "task_loss": 1.2978284358978271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29250138998031616, "epoch": 4.61, "learning_rate": 2.6931530008453087e-05, "loss": 0.4156, "step": 5458, "task_loss": 0.860486626625061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5911927223205566, "epoch": 4.61, "learning_rate": 2.6927303465765007e-05, "loss": 0.6204, "step": 5459, "task_loss": 1.335366129875183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39149215817451477, "epoch": 4.61, "learning_rate": 2.6923076923076923e-05, "loss": 0.5756, "step": 5460, "task_loss": 0.37639501690864563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2616904377937317, "epoch": 4.62, "learning_rate": 2.6918850380388843e-05, "loss": 0.4442, "step": 5461, "task_loss": 0.34931880235671997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37292516231536865, "epoch": 4.62, "learning_rate": 2.6914623837700763e-05, "loss": 0.6299, "step": 5462, "task_loss": 0.9685525298118591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6986571550369263, "epoch": 4.62, "learning_rate": 2.691039729501268e-05, "loss": 0.4735, "step": 5463, "task_loss": 1.4629547595977783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3897111415863037, "epoch": 4.62, "learning_rate": 2.69061707523246e-05, "loss": 0.4564, "step": 5464, "task_loss": 0.5439441204071045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24074490368366241, "epoch": 4.62, "learning_rate": 2.6901944209636522e-05, "loss": 0.5041, "step": 5465, "task_loss": 0.38880655169487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36179739236831665, "epoch": 4.62, "learning_rate": 2.6897717666948435e-05, "loss": 0.5335, "step": 5466, "task_loss": 0.8857415318489075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6761791706085205, "epoch": 4.62, "learning_rate": 2.6893491124260355e-05, "loss": 0.4016, "step": 5467, "task_loss": 0.8081907033920288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.654042661190033, "epoch": 4.62, "learning_rate": 2.6889264581572278e-05, "loss": 0.5364, "step": 5468, "task_loss": 0.15242618322372437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2569422721862793, "epoch": 4.62, "learning_rate": 2.688503803888419e-05, "loss": 0.4691, "step": 5469, "task_loss": 0.09678167849779129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45545610785484314, "epoch": 4.62, "learning_rate": 2.688081149619611e-05, "loss": 0.4192, "step": 5470, "task_loss": 0.7507992386817932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5276072025299072, "epoch": 4.62, "learning_rate": 2.6876584953508034e-05, "loss": 0.5169, "step": 5471, "task_loss": 0.18511104583740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.566360354423523, "epoch": 4.63, "learning_rate": 2.6872358410819953e-05, "loss": 0.5259, "step": 5472, "task_loss": 0.8933319449424744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.195001482963562, "epoch": 4.63, "learning_rate": 2.686813186813187e-05, "loss": 0.6434, "step": 5473, "task_loss": 0.6887006759643555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5327467918395996, "epoch": 4.63, "learning_rate": 2.686390532544379e-05, "loss": 0.5658, "step": 5474, "task_loss": 0.6111761331558228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4131084978580475, "epoch": 4.63, "learning_rate": 2.685967878275571e-05, "loss": 0.4146, "step": 5475, "task_loss": 0.3020959794521332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29281583428382874, "epoch": 4.63, "learning_rate": 2.6855452240067626e-05, "loss": 0.6726, "step": 5476, "task_loss": 0.9623677730560303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42006751894950867, "epoch": 4.63, "learning_rate": 2.6851225697379545e-05, "loss": 0.577, "step": 5477, "task_loss": 0.9024275541305542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5457375645637512, "epoch": 4.63, "learning_rate": 2.6846999154691465e-05, "loss": 0.6219, "step": 5478, "task_loss": 0.8965764045715332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.602469265460968, "epoch": 4.63, "learning_rate": 2.684277261200338e-05, "loss": 0.538, "step": 5479, "task_loss": 0.6006090641021729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8495910167694092, "epoch": 4.63, "learning_rate": 2.68385460693153e-05, "loss": 0.7306, "step": 5480, "task_loss": 0.9474236965179443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3784373998641968, "epoch": 4.63, "learning_rate": 2.683431952662722e-05, "loss": 0.4833, "step": 5481, "task_loss": 0.42260465025901794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42259857058525085, "epoch": 4.63, "learning_rate": 2.6830092983939137e-05, "loss": 0.359, "step": 5482, "task_loss": 0.29643863439559937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2174834907054901, "epoch": 4.63, "learning_rate": 2.6825866441251057e-05, "loss": 0.4274, "step": 5483, "task_loss": 1.445667028427124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3452601730823517, "epoch": 4.64, "learning_rate": 2.6821639898562977e-05, "loss": 0.4276, "step": 5484, "task_loss": 0.5791153907775879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5632915496826172, "epoch": 4.64, "learning_rate": 2.6817413355874893e-05, "loss": 0.566, "step": 5485, "task_loss": 0.7125264406204224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6186407208442688, "epoch": 4.64, "learning_rate": 2.6813186813186813e-05, "loss": 0.4919, "step": 5486, "task_loss": 0.7539494037628174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6099252700805664, "epoch": 4.64, "learning_rate": 2.6808960270498733e-05, "loss": 0.5985, "step": 5487, "task_loss": 0.6449882388114929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2927268445491791, "epoch": 4.64, "learning_rate": 2.6804733727810656e-05, "loss": 0.4786, "step": 5488, "task_loss": 0.7286347150802612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.480973482131958, "epoch": 4.64, "learning_rate": 2.680050718512257e-05, "loss": 0.4913, "step": 5489, "task_loss": 0.49751967191696167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6632481813430786, "epoch": 4.64, "learning_rate": 2.6796280642434492e-05, "loss": 0.4439, "step": 5490, "task_loss": 1.1208927631378174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4094443917274475, "epoch": 4.64, "learning_rate": 2.679205409974641e-05, "loss": 0.5099, "step": 5491, "task_loss": 0.5087623000144958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5138392448425293, "epoch": 4.64, "learning_rate": 2.6787827557058324e-05, "loss": 0.5412, "step": 5492, "task_loss": 0.5152201056480408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.552590012550354, "epoch": 4.64, "learning_rate": 2.6783601014370248e-05, "loss": 0.5042, "step": 5493, "task_loss": 0.4980115294456482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.587933361530304, "epoch": 4.64, "learning_rate": 2.6779374471682167e-05, "loss": 0.5931, "step": 5494, "task_loss": 0.7215527296066284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.607820987701416, "epoch": 4.64, "learning_rate": 2.6775147928994084e-05, "loss": 0.5731, "step": 5495, "task_loss": 0.9025735259056091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3524215519428253, "epoch": 4.65, "learning_rate": 2.6770921386306003e-05, "loss": 0.4171, "step": 5496, "task_loss": 1.23963463306427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3597066402435303, "epoch": 4.65, "learning_rate": 2.6766694843617923e-05, "loss": 0.4827, "step": 5497, "task_loss": 0.9997002482414246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3568958044052124, "epoch": 4.65, "learning_rate": 2.676246830092984e-05, "loss": 0.3837, "step": 5498, "task_loss": 1.1404331922531128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3596614599227905, "epoch": 4.65, "learning_rate": 2.675824175824176e-05, "loss": 0.2812, "step": 5499, "task_loss": 0.29628556966781616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43777722120285034, "epoch": 4.65, "learning_rate": 2.675401521555368e-05, "loss": 0.4893, "step": 5500, "task_loss": 0.7023282051086426 }, { "epoch": 4.65, "eval_accuracy": 0.9026138613861386, "eval_loss": 0.3561651408672333, "eval_runtime": 228.1759, "eval_samples_per_second": 110.66, "eval_steps_per_second": 0.868, "step": 5500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4523773789405823, "epoch": 4.65, "learning_rate": 2.67497886728656e-05, "loss": 0.6071, "step": 5501, "task_loss": 0.3969096541404724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7608723044395447, "epoch": 4.65, "learning_rate": 2.6745562130177515e-05, "loss": 0.547, "step": 5502, "task_loss": 0.8551808595657349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31090399622917175, "epoch": 4.65, "learning_rate": 2.6741335587489435e-05, "loss": 0.4405, "step": 5503, "task_loss": 1.0763027667999268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5198166370391846, "epoch": 4.65, "learning_rate": 2.6737109044801355e-05, "loss": 0.5792, "step": 5504, "task_loss": 0.6711432933807373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2607373297214508, "epoch": 4.65, "learning_rate": 2.673288250211327e-05, "loss": 0.5798, "step": 5505, "task_loss": 0.37070998549461365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43698394298553467, "epoch": 4.65, "learning_rate": 2.672865595942519e-05, "loss": 0.3589, "step": 5506, "task_loss": 0.3279370367527008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5569877624511719, "epoch": 4.65, "learning_rate": 2.6724429416737114e-05, "loss": 0.499, "step": 5507, "task_loss": 0.5627281665802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48648685216903687, "epoch": 4.66, "learning_rate": 2.6720202874049027e-05, "loss": 0.5581, "step": 5508, "task_loss": 1.4164979457855225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5226390361785889, "epoch": 4.66, "learning_rate": 2.6715976331360946e-05, "loss": 0.5408, "step": 5509, "task_loss": 0.7491970658302307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3967892825603485, "epoch": 4.66, "learning_rate": 2.671174978867287e-05, "loss": 0.4699, "step": 5510, "task_loss": 0.6714504361152649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5288428664207458, "epoch": 4.66, "learning_rate": 2.6707523245984783e-05, "loss": 0.5784, "step": 5511, "task_loss": 0.5653752684593201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2977157235145569, "epoch": 4.66, "learning_rate": 2.6703296703296706e-05, "loss": 0.5335, "step": 5512, "task_loss": 0.02223970927298069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6852577328681946, "epoch": 4.66, "learning_rate": 2.6699070160608625e-05, "loss": 0.5361, "step": 5513, "task_loss": 0.4608360826969147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4037739932537079, "epoch": 4.66, "learning_rate": 2.669484361792054e-05, "loss": 0.4604, "step": 5514, "task_loss": 0.48674654960632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.317607581615448, "epoch": 4.66, "learning_rate": 2.669061707523246e-05, "loss": 0.52, "step": 5515, "task_loss": 0.46464473009109497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27266260981559753, "epoch": 4.66, "learning_rate": 2.668639053254438e-05, "loss": 0.4148, "step": 5516, "task_loss": 0.6932034492492676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4652734398841858, "epoch": 4.66, "learning_rate": 2.66821639898563e-05, "loss": 0.5323, "step": 5517, "task_loss": 0.5104216933250427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7152064442634583, "epoch": 4.66, "learning_rate": 2.6677937447168217e-05, "loss": 0.494, "step": 5518, "task_loss": 0.8032392263412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6222490072250366, "epoch": 4.66, "learning_rate": 2.6673710904480137e-05, "loss": 0.6388, "step": 5519, "task_loss": 0.26591697335243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39413371682167053, "epoch": 4.67, "learning_rate": 2.6669484361792057e-05, "loss": 0.4606, "step": 5520, "task_loss": 0.651878297328949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32757729291915894, "epoch": 4.67, "learning_rate": 2.6665257819103973e-05, "loss": 0.3725, "step": 5521, "task_loss": 0.1749047487974167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41070133447647095, "epoch": 4.67, "learning_rate": 2.6661031276415893e-05, "loss": 0.4867, "step": 5522, "task_loss": 0.613288164138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4388280212879181, "epoch": 4.67, "learning_rate": 2.6656804733727813e-05, "loss": 0.5016, "step": 5523, "task_loss": 0.6644706130027771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8439745903015137, "epoch": 4.67, "learning_rate": 2.665257819103973e-05, "loss": 0.6902, "step": 5524, "task_loss": 0.130574032664299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3907517194747925, "epoch": 4.67, "learning_rate": 2.664835164835165e-05, "loss": 0.5417, "step": 5525, "task_loss": 0.33565089106559753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41575074195861816, "epoch": 4.67, "learning_rate": 2.664412510566357e-05, "loss": 0.5337, "step": 5526, "task_loss": 0.46007034182548523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6557846069335938, "epoch": 4.67, "learning_rate": 2.6639898562975485e-05, "loss": 0.5275, "step": 5527, "task_loss": 1.5052037239074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5499823689460754, "epoch": 4.67, "learning_rate": 2.6635672020287405e-05, "loss": 0.4302, "step": 5528, "task_loss": 0.8446851968765259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2966265082359314, "epoch": 4.67, "learning_rate": 2.6631445477599328e-05, "loss": 0.5629, "step": 5529, "task_loss": 0.1258809119462967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5064107179641724, "epoch": 4.67, "learning_rate": 2.6627218934911247e-05, "loss": 0.6413, "step": 5530, "task_loss": 0.6399808526039124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5774915218353271, "epoch": 4.67, "learning_rate": 2.662299239222316e-05, "loss": 0.55, "step": 5531, "task_loss": 0.8530387878417969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5741387605667114, "epoch": 4.68, "learning_rate": 2.6618765849535084e-05, "loss": 0.5026, "step": 5532, "task_loss": 0.8792498707771301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5797810554504395, "epoch": 4.68, "learning_rate": 2.6614539306847003e-05, "loss": 0.4534, "step": 5533, "task_loss": 0.06427767872810364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35390692949295044, "epoch": 4.68, "learning_rate": 2.6610312764158916e-05, "loss": 0.4629, "step": 5534, "task_loss": 0.5747318267822266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22252270579338074, "epoch": 4.68, "learning_rate": 2.660608622147084e-05, "loss": 0.5484, "step": 5535, "task_loss": 0.3115277588367462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35737746953964233, "epoch": 4.68, "learning_rate": 2.660185967878276e-05, "loss": 0.4069, "step": 5536, "task_loss": 1.1496926546096802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7334063053131104, "epoch": 4.68, "learning_rate": 2.6597633136094675e-05, "loss": 0.426, "step": 5537, "task_loss": 0.9971699714660645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.433099627494812, "epoch": 4.68, "learning_rate": 2.6593406593406595e-05, "loss": 0.4768, "step": 5538, "task_loss": 0.44010573625564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35387203097343445, "epoch": 4.68, "learning_rate": 2.6589180050718515e-05, "loss": 0.5987, "step": 5539, "task_loss": 1.3987623453140259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4783155918121338, "epoch": 4.68, "learning_rate": 2.658495350803043e-05, "loss": 0.7451, "step": 5540, "task_loss": 0.46355360746383667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32887375354766846, "epoch": 4.68, "learning_rate": 2.658072696534235e-05, "loss": 0.3279, "step": 5541, "task_loss": 0.37576723098754883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2954345941543579, "epoch": 4.68, "learning_rate": 2.657650042265427e-05, "loss": 0.5132, "step": 5542, "task_loss": 1.1704410314559937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38812386989593506, "epoch": 4.69, "learning_rate": 2.6572273879966187e-05, "loss": 0.5396, "step": 5543, "task_loss": 1.120713233947754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36863604187965393, "epoch": 4.69, "learning_rate": 2.6568047337278107e-05, "loss": 0.5563, "step": 5544, "task_loss": 0.40240323543548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2079882025718689, "epoch": 4.69, "learning_rate": 2.6563820794590027e-05, "loss": 0.2734, "step": 5545, "task_loss": 0.45972466468811035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5143852233886719, "epoch": 4.69, "learning_rate": 2.655959425190195e-05, "loss": 0.5082, "step": 5546, "task_loss": 0.5054423213005066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3416542410850525, "epoch": 4.69, "learning_rate": 2.6555367709213863e-05, "loss": 0.3293, "step": 5547, "task_loss": 0.4533216953277588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4784584939479828, "epoch": 4.69, "learning_rate": 2.6551141166525782e-05, "loss": 0.646, "step": 5548, "task_loss": 1.083616018295288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29557737708091736, "epoch": 4.69, "learning_rate": 2.6546914623837706e-05, "loss": 0.3529, "step": 5549, "task_loss": 0.8247396945953369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.933720588684082, "epoch": 4.69, "learning_rate": 2.654268808114962e-05, "loss": 0.7319, "step": 5550, "task_loss": 0.4455767273902893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43065202236175537, "epoch": 4.69, "learning_rate": 2.6538461538461538e-05, "loss": 0.4658, "step": 5551, "task_loss": 0.6591951847076416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5519582033157349, "epoch": 4.69, "learning_rate": 2.653423499577346e-05, "loss": 0.4613, "step": 5552, "task_loss": 0.8736343383789062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42683807015419006, "epoch": 4.69, "learning_rate": 2.6530008453085374e-05, "loss": 0.5368, "step": 5553, "task_loss": 1.297966718673706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41083237528800964, "epoch": 4.69, "learning_rate": 2.6525781910397297e-05, "loss": 0.5072, "step": 5554, "task_loss": 0.28636035323143005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5030954480171204, "epoch": 4.7, "learning_rate": 2.6521555367709217e-05, "loss": 0.4919, "step": 5555, "task_loss": 0.4310172200202942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.513940691947937, "epoch": 4.7, "learning_rate": 2.651732882502113e-05, "loss": 0.5688, "step": 5556, "task_loss": 0.609089195728302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4849684536457062, "epoch": 4.7, "learning_rate": 2.6513102282333053e-05, "loss": 0.5868, "step": 5557, "task_loss": 0.34574028849601746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30403852462768555, "epoch": 4.7, "learning_rate": 2.6508875739644973e-05, "loss": 0.5085, "step": 5558, "task_loss": 0.4700937569141388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4608495831489563, "epoch": 4.7, "learning_rate": 2.6504649196956893e-05, "loss": 0.4697, "step": 5559, "task_loss": 0.6089870929718018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38940346240997314, "epoch": 4.7, "learning_rate": 2.650042265426881e-05, "loss": 0.5475, "step": 5560, "task_loss": 0.7848317623138428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37892967462539673, "epoch": 4.7, "learning_rate": 2.649619611158073e-05, "loss": 0.4604, "step": 5561, "task_loss": 0.572037935256958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7236365079879761, "epoch": 4.7, "learning_rate": 2.649196956889265e-05, "loss": 0.6131, "step": 5562, "task_loss": 0.6075588464736938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5459661483764648, "epoch": 4.7, "learning_rate": 2.6487743026204565e-05, "loss": 0.5209, "step": 5563, "task_loss": 0.5323628783226013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5334126353263855, "epoch": 4.7, "learning_rate": 2.6483516483516485e-05, "loss": 0.5249, "step": 5564, "task_loss": 0.1525871753692627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3653854727745056, "epoch": 4.7, "learning_rate": 2.6479289940828404e-05, "loss": 0.5442, "step": 5565, "task_loss": 0.5842914581298828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37355881929397583, "epoch": 4.7, "learning_rate": 2.647506339814032e-05, "loss": 0.6531, "step": 5566, "task_loss": 0.5938406586647034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5712457895278931, "epoch": 4.71, "learning_rate": 2.647083685545224e-05, "loss": 0.4361, "step": 5567, "task_loss": 0.5165548324584961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7099549770355225, "epoch": 4.71, "learning_rate": 2.646661031276416e-05, "loss": 0.6888, "step": 5568, "task_loss": 0.23085536062717438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6301600337028503, "epoch": 4.71, "learning_rate": 2.6462383770076077e-05, "loss": 0.6003, "step": 5569, "task_loss": 0.5380938649177551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4351697564125061, "epoch": 4.71, "learning_rate": 2.6458157227387996e-05, "loss": 0.4229, "step": 5570, "task_loss": 0.7270271182060242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.771167516708374, "epoch": 4.71, "learning_rate": 2.645393068469992e-05, "loss": 0.6589, "step": 5571, "task_loss": 0.7263500094413757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8471298217773438, "epoch": 4.71, "learning_rate": 2.6449704142011832e-05, "loss": 0.6533, "step": 5572, "task_loss": 1.0060794353485107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6487619280815125, "epoch": 4.71, "learning_rate": 2.6445477599323752e-05, "loss": 0.5162, "step": 5573, "task_loss": 0.6963391304016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6797330975532532, "epoch": 4.71, "learning_rate": 2.6441251056635675e-05, "loss": 0.4536, "step": 5574, "task_loss": 0.780808687210083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5461999177932739, "epoch": 4.71, "learning_rate": 2.6437024513947595e-05, "loss": 0.5778, "step": 5575, "task_loss": 1.1536657810211182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5113688707351685, "epoch": 4.71, "learning_rate": 2.643279797125951e-05, "loss": 0.5099, "step": 5576, "task_loss": 0.2360696941614151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4419170320034027, "epoch": 4.71, "learning_rate": 2.642857142857143e-05, "loss": 0.5226, "step": 5577, "task_loss": 0.824263870716095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5355842709541321, "epoch": 4.71, "learning_rate": 2.642434488588335e-05, "loss": 0.5348, "step": 5578, "task_loss": 0.70827716588974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8563870787620544, "epoch": 4.72, "learning_rate": 2.6420118343195267e-05, "loss": 0.6513, "step": 5579, "task_loss": 1.5571237802505493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36583828926086426, "epoch": 4.72, "learning_rate": 2.6415891800507187e-05, "loss": 0.5045, "step": 5580, "task_loss": 0.10022090375423431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2370641678571701, "epoch": 4.72, "learning_rate": 2.6411665257819107e-05, "loss": 0.5292, "step": 5581, "task_loss": 0.14611811935901642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3379738926887512, "epoch": 4.72, "learning_rate": 2.6407438715131023e-05, "loss": 0.4644, "step": 5582, "task_loss": 0.4610328674316406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7449734807014465, "epoch": 4.72, "learning_rate": 2.6403212172442943e-05, "loss": 0.7266, "step": 5583, "task_loss": 0.6059086918830872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.518571674823761, "epoch": 4.72, "learning_rate": 2.6398985629754862e-05, "loss": 0.4909, "step": 5584, "task_loss": 1.381953239440918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.572045087814331, "epoch": 4.72, "learning_rate": 2.639475908706678e-05, "loss": 0.4748, "step": 5585, "task_loss": 1.100764274597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49839168787002563, "epoch": 4.72, "learning_rate": 2.63905325443787e-05, "loss": 0.5934, "step": 5586, "task_loss": 0.5220285058021545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3297421932220459, "epoch": 4.72, "learning_rate": 2.638630600169062e-05, "loss": 0.5343, "step": 5587, "task_loss": 0.38556134700775146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45048609375953674, "epoch": 4.72, "learning_rate": 2.6382079459002535e-05, "loss": 0.5049, "step": 5588, "task_loss": 1.2399929761886597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5687627196311951, "epoch": 4.72, "learning_rate": 2.6377852916314454e-05, "loss": 0.5119, "step": 5589, "task_loss": 0.4569653272628784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3494449853897095, "epoch": 4.72, "learning_rate": 2.6373626373626374e-05, "loss": 0.3949, "step": 5590, "task_loss": 0.20925568044185638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5153350234031677, "epoch": 4.73, "learning_rate": 2.6369399830938297e-05, "loss": 0.5013, "step": 5591, "task_loss": 1.0863345861434937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38383543491363525, "epoch": 4.73, "learning_rate": 2.636517328825021e-05, "loss": 0.4107, "step": 5592, "task_loss": 0.7033401727676392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5520209670066833, "epoch": 4.73, "learning_rate": 2.6360946745562133e-05, "loss": 0.4617, "step": 5593, "task_loss": 1.207166314125061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5075170993804932, "epoch": 4.73, "learning_rate": 2.6356720202874053e-05, "loss": 0.6318, "step": 5594, "task_loss": 0.9788430333137512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41028743982315063, "epoch": 4.73, "learning_rate": 2.6352493660185966e-05, "loss": 0.5641, "step": 5595, "task_loss": 0.4345182478427887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4060463309288025, "epoch": 4.73, "learning_rate": 2.634826711749789e-05, "loss": 0.5916, "step": 5596, "task_loss": 0.47377336025238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5743918418884277, "epoch": 4.73, "learning_rate": 2.634404057480981e-05, "loss": 0.551, "step": 5597, "task_loss": 1.3714823722839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7340099811553955, "epoch": 4.73, "learning_rate": 2.6339814032121722e-05, "loss": 0.5147, "step": 5598, "task_loss": 0.7502601742744446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5614179968833923, "epoch": 4.73, "learning_rate": 2.6335587489433645e-05, "loss": 0.4893, "step": 5599, "task_loss": 0.637708842754364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5504275560379028, "epoch": 4.73, "learning_rate": 2.6331360946745565e-05, "loss": 0.7383, "step": 5600, "task_loss": 0.8209319114685059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37135112285614014, "epoch": 4.73, "learning_rate": 2.632713440405748e-05, "loss": 0.3424, "step": 5601, "task_loss": 0.453311026096344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3217511475086212, "epoch": 4.73, "learning_rate": 2.63229078613694e-05, "loss": 0.4112, "step": 5602, "task_loss": 1.0388603210449219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6180079579353333, "epoch": 4.74, "learning_rate": 2.631868131868132e-05, "loss": 0.5138, "step": 5603, "task_loss": 0.09587042033672333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36335116624832153, "epoch": 4.74, "learning_rate": 2.631445477599324e-05, "loss": 0.4446, "step": 5604, "task_loss": 0.30712229013442993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6665881872177124, "epoch": 4.74, "learning_rate": 2.6310228233305157e-05, "loss": 0.5388, "step": 5605, "task_loss": 1.4466938972473145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5067758560180664, "epoch": 4.74, "learning_rate": 2.6306001690617076e-05, "loss": 0.6159, "step": 5606, "task_loss": 0.5833654999732971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6314814686775208, "epoch": 4.74, "learning_rate": 2.6301775147928996e-05, "loss": 0.606, "step": 5607, "task_loss": 0.12786057591438293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.675163745880127, "epoch": 4.74, "learning_rate": 2.6297548605240912e-05, "loss": 0.5101, "step": 5608, "task_loss": 1.256523609161377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45086413621902466, "epoch": 4.74, "learning_rate": 2.6293322062552832e-05, "loss": 0.6938, "step": 5609, "task_loss": 0.2894653081893921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6532406806945801, "epoch": 4.74, "learning_rate": 2.6289095519864755e-05, "loss": 0.5424, "step": 5610, "task_loss": 1.1959331035614014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5219150185585022, "epoch": 4.74, "learning_rate": 2.628486897717667e-05, "loss": 0.5459, "step": 5611, "task_loss": 0.8497066497802734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7167882919311523, "epoch": 4.74, "learning_rate": 2.6280642434488588e-05, "loss": 0.5655, "step": 5612, "task_loss": 1.190349817276001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4336186349391937, "epoch": 4.74, "learning_rate": 2.627641589180051e-05, "loss": 0.3743, "step": 5613, "task_loss": 0.39047151803970337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5110586285591125, "epoch": 4.75, "learning_rate": 2.6272189349112424e-05, "loss": 0.5845, "step": 5614, "task_loss": 0.6457735896110535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5020008683204651, "epoch": 4.75, "learning_rate": 2.6267962806424344e-05, "loss": 0.5204, "step": 5615, "task_loss": 1.2768497467041016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8477251529693604, "epoch": 4.75, "learning_rate": 2.6263736263736267e-05, "loss": 0.5867, "step": 5616, "task_loss": 1.039192795753479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5431690216064453, "epoch": 4.75, "learning_rate": 2.625950972104818e-05, "loss": 0.625, "step": 5617, "task_loss": 1.0950745344161987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.363947331905365, "epoch": 4.75, "learning_rate": 2.6255283178360103e-05, "loss": 0.409, "step": 5618, "task_loss": 0.39402270317077637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40578022599220276, "epoch": 4.75, "learning_rate": 2.6251056635672023e-05, "loss": 0.6084, "step": 5619, "task_loss": 1.3617318868637085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6474149823188782, "epoch": 4.75, "learning_rate": 2.6246830092983943e-05, "loss": 0.4541, "step": 5620, "task_loss": 0.48807433247566223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5947340130805969, "epoch": 4.75, "learning_rate": 2.624260355029586e-05, "loss": 0.5067, "step": 5621, "task_loss": 0.968641459941864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0875134468078613, "epoch": 4.75, "learning_rate": 2.623837700760778e-05, "loss": 0.6976, "step": 5622, "task_loss": 0.7974452972412109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5867829322814941, "epoch": 4.75, "learning_rate": 2.62341504649197e-05, "loss": 0.4656, "step": 5623, "task_loss": 0.5273624062538147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4523177742958069, "epoch": 4.75, "learning_rate": 2.6229923922231615e-05, "loss": 0.5299, "step": 5624, "task_loss": 0.8055267930030823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37007904052734375, "epoch": 4.75, "learning_rate": 2.6225697379543535e-05, "loss": 0.5727, "step": 5625, "task_loss": 0.7688056230545044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6906920671463013, "epoch": 4.76, "learning_rate": 2.6221470836855454e-05, "loss": 0.5354, "step": 5626, "task_loss": 1.0719765424728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42443329095840454, "epoch": 4.76, "learning_rate": 2.621724429416737e-05, "loss": 0.6551, "step": 5627, "task_loss": 0.6896079182624817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4050268530845642, "epoch": 4.76, "learning_rate": 2.621301775147929e-05, "loss": 0.3016, "step": 5628, "task_loss": 0.46010303497314453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.780997097492218, "epoch": 4.76, "learning_rate": 2.620879120879121e-05, "loss": 0.7151, "step": 5629, "task_loss": 0.807028591632843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5751667022705078, "epoch": 4.76, "learning_rate": 2.6204564666103126e-05, "loss": 0.5585, "step": 5630, "task_loss": 0.7727732062339783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6906204223632812, "epoch": 4.76, "learning_rate": 2.6200338123415046e-05, "loss": 0.6195, "step": 5631, "task_loss": 0.48975932598114014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4904405474662781, "epoch": 4.76, "learning_rate": 2.6196111580726966e-05, "loss": 0.5136, "step": 5632, "task_loss": 0.41775432229042053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31652069091796875, "epoch": 4.76, "learning_rate": 2.619188503803889e-05, "loss": 0.352, "step": 5633, "task_loss": 0.2876855432987213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5669068098068237, "epoch": 4.76, "learning_rate": 2.6187658495350802e-05, "loss": 0.5618, "step": 5634, "task_loss": 1.006703495979309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3489764928817749, "epoch": 4.76, "learning_rate": 2.6183431952662725e-05, "loss": 0.4251, "step": 5635, "task_loss": 0.7292515635490417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4997534155845642, "epoch": 4.76, "learning_rate": 2.6179205409974645e-05, "loss": 0.5169, "step": 5636, "task_loss": 0.5023015737533569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9334338307380676, "epoch": 4.76, "learning_rate": 2.6174978867286558e-05, "loss": 0.6109, "step": 5637, "task_loss": 0.6112658381462097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4933055639266968, "epoch": 4.77, "learning_rate": 2.617075232459848e-05, "loss": 0.6851, "step": 5638, "task_loss": 0.6476717591285706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4700254201889038, "epoch": 4.77, "learning_rate": 2.61665257819104e-05, "loss": 0.4386, "step": 5639, "task_loss": 0.898995041847229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6200739741325378, "epoch": 4.77, "learning_rate": 2.6162299239222317e-05, "loss": 0.5285, "step": 5640, "task_loss": 1.8384853601455688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.626653254032135, "epoch": 4.77, "learning_rate": 2.6158072696534237e-05, "loss": 0.6014, "step": 5641, "task_loss": 0.7722612023353577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20563089847564697, "epoch": 4.77, "learning_rate": 2.6153846153846157e-05, "loss": 0.5374, "step": 5642, "task_loss": 0.4498206377029419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7185769081115723, "epoch": 4.77, "learning_rate": 2.6149619611158073e-05, "loss": 0.602, "step": 5643, "task_loss": 0.7585304975509644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2892688512802124, "epoch": 4.77, "learning_rate": 2.6145393068469993e-05, "loss": 0.5533, "step": 5644, "task_loss": 0.6774147748947144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5541114807128906, "epoch": 4.77, "learning_rate": 2.6141166525781912e-05, "loss": 0.4698, "step": 5645, "task_loss": 1.1297212839126587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28559190034866333, "epoch": 4.77, "learning_rate": 2.613693998309383e-05, "loss": 0.4665, "step": 5646, "task_loss": 0.3137655556201935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36219853162765503, "epoch": 4.77, "learning_rate": 2.613271344040575e-05, "loss": 0.6662, "step": 5647, "task_loss": 0.839945912361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5286502242088318, "epoch": 4.77, "learning_rate": 2.6128486897717668e-05, "loss": 0.5826, "step": 5648, "task_loss": 0.5320906043052673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36559855937957764, "epoch": 4.77, "learning_rate": 2.6124260355029588e-05, "loss": 0.5939, "step": 5649, "task_loss": 0.5208147764205933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40268534421920776, "epoch": 4.78, "learning_rate": 2.6120033812341504e-05, "loss": 0.485, "step": 5650, "task_loss": 0.2845090627670288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2513153553009033, "epoch": 4.78, "learning_rate": 2.6115807269653424e-05, "loss": 0.443, "step": 5651, "task_loss": 0.1188097819685936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23720800876617432, "epoch": 4.78, "learning_rate": 2.6111580726965347e-05, "loss": 0.397, "step": 5652, "task_loss": 1.0551912784576416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2911929488182068, "epoch": 4.78, "learning_rate": 2.610735418427726e-05, "loss": 0.4006, "step": 5653, "task_loss": 0.6315035223960876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43509918451309204, "epoch": 4.78, "learning_rate": 2.610312764158918e-05, "loss": 0.4368, "step": 5654, "task_loss": 0.22782652080059052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7625494003295898, "epoch": 4.78, "learning_rate": 2.6098901098901103e-05, "loss": 0.6781, "step": 5655, "task_loss": 1.1475688219070435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3548119068145752, "epoch": 4.78, "learning_rate": 2.6094674556213016e-05, "loss": 0.5091, "step": 5656, "task_loss": 1.0246869325637817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4572964012622833, "epoch": 4.78, "learning_rate": 2.609044801352494e-05, "loss": 0.6032, "step": 5657, "task_loss": 0.47745344042778015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6380445957183838, "epoch": 4.78, "learning_rate": 2.608622147083686e-05, "loss": 0.6741, "step": 5658, "task_loss": 1.09572434425354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8480476140975952, "epoch": 4.78, "learning_rate": 2.6081994928148772e-05, "loss": 0.5767, "step": 5659, "task_loss": 0.14236420392990112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5163456201553345, "epoch": 4.78, "learning_rate": 2.6077768385460695e-05, "loss": 0.5335, "step": 5660, "task_loss": 0.6128365993499756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2095562219619751, "epoch": 4.78, "learning_rate": 2.6073541842772615e-05, "loss": 0.5052, "step": 5661, "task_loss": 0.4599507451057434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5721461772918701, "epoch": 4.79, "learning_rate": 2.6069315300084534e-05, "loss": 0.5359, "step": 5662, "task_loss": 0.14122001826763153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.548396110534668, "epoch": 4.79, "learning_rate": 2.606508875739645e-05, "loss": 0.4802, "step": 5663, "task_loss": 0.3679597079753876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6143293976783752, "epoch": 4.79, "learning_rate": 2.606086221470837e-05, "loss": 0.5078, "step": 5664, "task_loss": 0.9742221832275391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3170853555202484, "epoch": 4.79, "learning_rate": 2.605663567202029e-05, "loss": 0.4282, "step": 5665, "task_loss": 0.6404563188552856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7089065909385681, "epoch": 4.79, "learning_rate": 2.6052409129332207e-05, "loss": 0.6001, "step": 5666, "task_loss": 0.19910073280334473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3767755329608917, "epoch": 4.79, "learning_rate": 2.6048182586644126e-05, "loss": 0.4552, "step": 5667, "task_loss": 0.6200957298278809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43517014384269714, "epoch": 4.79, "learning_rate": 2.6043956043956046e-05, "loss": 0.4715, "step": 5668, "task_loss": 0.3498627543449402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45215046405792236, "epoch": 4.79, "learning_rate": 2.6039729501267962e-05, "loss": 0.4349, "step": 5669, "task_loss": 0.1911943554878235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.287114679813385, "epoch": 4.79, "learning_rate": 2.6035502958579882e-05, "loss": 0.5109, "step": 5670, "task_loss": 0.7816593050956726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.960198163986206, "epoch": 4.79, "learning_rate": 2.6031276415891802e-05, "loss": 0.844, "step": 5671, "task_loss": 1.0055745840072632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.74333655834198, "epoch": 4.79, "learning_rate": 2.6027049873203718e-05, "loss": 0.5266, "step": 5672, "task_loss": 0.44717124104499817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37837931513786316, "epoch": 4.79, "learning_rate": 2.6022823330515638e-05, "loss": 0.5604, "step": 5673, "task_loss": 0.9327839016914368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35657021403312683, "epoch": 4.8, "learning_rate": 2.601859678782756e-05, "loss": 0.6562, "step": 5674, "task_loss": 0.4927375912666321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44908827543258667, "epoch": 4.8, "learning_rate": 2.6014370245139474e-05, "loss": 0.5025, "step": 5675, "task_loss": 0.5313500165939331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24783718585968018, "epoch": 4.8, "learning_rate": 2.6010143702451394e-05, "loss": 0.4692, "step": 5676, "task_loss": 0.2493368536233902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6368770003318787, "epoch": 4.8, "learning_rate": 2.6005917159763317e-05, "loss": 0.4334, "step": 5677, "task_loss": 0.7560250759124756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.587492823600769, "epoch": 4.8, "learning_rate": 2.6001690617075237e-05, "loss": 0.6201, "step": 5678, "task_loss": 0.7745904326438904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48275214433670044, "epoch": 4.8, "learning_rate": 2.599746407438715e-05, "loss": 0.4779, "step": 5679, "task_loss": 1.784326434135437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.526221513748169, "epoch": 4.8, "learning_rate": 2.5993237531699073e-05, "loss": 0.5545, "step": 5680, "task_loss": 0.9344443082809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.525942325592041, "epoch": 4.8, "learning_rate": 2.5989010989010992e-05, "loss": 0.5906, "step": 5681, "task_loss": 0.7862739562988281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5827715396881104, "epoch": 4.8, "learning_rate": 2.598478444632291e-05, "loss": 0.6131, "step": 5682, "task_loss": 0.4872420132160187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24403181672096252, "epoch": 4.8, "learning_rate": 2.598055790363483e-05, "loss": 0.41, "step": 5683, "task_loss": 0.06736335903406143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.913641631603241, "epoch": 4.8, "learning_rate": 2.5976331360946748e-05, "loss": 0.6992, "step": 5684, "task_loss": 0.8783570528030396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30338025093078613, "epoch": 4.81, "learning_rate": 2.5972104818258665e-05, "loss": 0.4368, "step": 5685, "task_loss": 0.7445864677429199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6875220537185669, "epoch": 4.81, "learning_rate": 2.5967878275570584e-05, "loss": 0.5046, "step": 5686, "task_loss": 1.198779582977295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3079240322113037, "epoch": 4.81, "learning_rate": 2.5963651732882504e-05, "loss": 0.4434, "step": 5687, "task_loss": 0.24542582035064697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5229561924934387, "epoch": 4.81, "learning_rate": 2.595942519019442e-05, "loss": 0.5732, "step": 5688, "task_loss": 0.464175820350647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7783998250961304, "epoch": 4.81, "learning_rate": 2.595519864750634e-05, "loss": 0.5074, "step": 5689, "task_loss": 0.7924084663391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3210090398788452, "epoch": 4.81, "learning_rate": 2.595097210481826e-05, "loss": 0.3673, "step": 5690, "task_loss": 0.5120891332626343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4437573552131653, "epoch": 4.81, "learning_rate": 2.5946745562130183e-05, "loss": 0.4692, "step": 5691, "task_loss": 1.708738088607788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.434333860874176, "epoch": 4.81, "learning_rate": 2.5942519019442096e-05, "loss": 0.4943, "step": 5692, "task_loss": 1.3493313789367676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8483754396438599, "epoch": 4.81, "learning_rate": 2.5938292476754016e-05, "loss": 0.7299, "step": 5693, "task_loss": 1.4283978939056396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5169681310653687, "epoch": 4.81, "learning_rate": 2.593406593406594e-05, "loss": 0.575, "step": 5694, "task_loss": 0.705615222454071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.646710991859436, "epoch": 4.81, "learning_rate": 2.5929839391377852e-05, "loss": 0.5696, "step": 5695, "task_loss": 0.9948090314865112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6141926050186157, "epoch": 4.81, "learning_rate": 2.592561284868977e-05, "loss": 0.5006, "step": 5696, "task_loss": 0.6821855306625366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7578493356704712, "epoch": 4.82, "learning_rate": 2.5921386306001695e-05, "loss": 0.5585, "step": 5697, "task_loss": 0.8133834600448608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30476129055023193, "epoch": 4.82, "learning_rate": 2.5917159763313608e-05, "loss": 0.4544, "step": 5698, "task_loss": 1.0011470317840576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24370303750038147, "epoch": 4.82, "learning_rate": 2.591293322062553e-05, "loss": 0.5116, "step": 5699, "task_loss": 0.9572376608848572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40835651755332947, "epoch": 4.82, "learning_rate": 2.590870667793745e-05, "loss": 0.3912, "step": 5700, "task_loss": 0.5730369687080383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5113409757614136, "epoch": 4.82, "learning_rate": 2.5904480135249363e-05, "loss": 0.4246, "step": 5701, "task_loss": 0.2606313228607178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4817047715187073, "epoch": 4.82, "learning_rate": 2.5900253592561287e-05, "loss": 0.5348, "step": 5702, "task_loss": 0.3780542016029358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4873103201389313, "epoch": 4.82, "learning_rate": 2.5896027049873206e-05, "loss": 0.4629, "step": 5703, "task_loss": 0.9705052971839905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0966441631317139, "epoch": 4.82, "learning_rate": 2.5891800507185123e-05, "loss": 0.5141, "step": 5704, "task_loss": 0.5987086892127991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7557412981987, "epoch": 4.82, "learning_rate": 2.5887573964497042e-05, "loss": 0.4665, "step": 5705, "task_loss": 0.24284757673740387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5400998592376709, "epoch": 4.82, "learning_rate": 2.5883347421808962e-05, "loss": 0.607, "step": 5706, "task_loss": 0.4309907555580139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5115411877632141, "epoch": 4.82, "learning_rate": 2.5879120879120882e-05, "loss": 0.771, "step": 5707, "task_loss": 0.6689947843551636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5601620674133301, "epoch": 4.82, "learning_rate": 2.5874894336432798e-05, "loss": 0.5574, "step": 5708, "task_loss": 0.7265559434890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7620468735694885, "epoch": 4.83, "learning_rate": 2.5870667793744718e-05, "loss": 0.637, "step": 5709, "task_loss": 0.2640365660190582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42057105898857117, "epoch": 4.83, "learning_rate": 2.5866441251056638e-05, "loss": 0.5882, "step": 5710, "task_loss": 0.6982191205024719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6694300174713135, "epoch": 4.83, "learning_rate": 2.5862214708368554e-05, "loss": 0.5296, "step": 5711, "task_loss": 0.5387284159660339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7289752960205078, "epoch": 4.83, "learning_rate": 2.5857988165680474e-05, "loss": 0.5318, "step": 5712, "task_loss": 0.16338476538658142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5404638051986694, "epoch": 4.83, "learning_rate": 2.5853761622992394e-05, "loss": 0.6265, "step": 5713, "task_loss": 0.7374786138534546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5856848955154419, "epoch": 4.83, "learning_rate": 2.584953508030431e-05, "loss": 0.4605, "step": 5714, "task_loss": 0.33530861139297485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8572466373443604, "epoch": 4.83, "learning_rate": 2.584530853761623e-05, "loss": 0.5386, "step": 5715, "task_loss": 0.5358402729034424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4709377884864807, "epoch": 4.83, "learning_rate": 2.5841081994928153e-05, "loss": 0.5637, "step": 5716, "task_loss": 0.35859978199005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40097755193710327, "epoch": 4.83, "learning_rate": 2.5836855452240066e-05, "loss": 0.6534, "step": 5717, "task_loss": 0.48819518089294434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15693235397338867, "epoch": 4.83, "learning_rate": 2.5832628909551985e-05, "loss": 0.5329, "step": 5718, "task_loss": 0.3870006203651428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.611301600933075, "epoch": 4.83, "learning_rate": 2.582840236686391e-05, "loss": 0.5204, "step": 5719, "task_loss": 0.3511592149734497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38320550322532654, "epoch": 4.83, "learning_rate": 2.582417582417583e-05, "loss": 0.4812, "step": 5720, "task_loss": 0.5833104848861694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4239521622657776, "epoch": 4.84, "learning_rate": 2.5819949281487745e-05, "loss": 0.4865, "step": 5721, "task_loss": 1.3155471086502075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.374227374792099, "epoch": 4.84, "learning_rate": 2.5815722738799664e-05, "loss": 0.4848, "step": 5722, "task_loss": 0.155734583735466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3500721752643585, "epoch": 4.84, "learning_rate": 2.5811496196111584e-05, "loss": 0.3175, "step": 5723, "task_loss": 0.07309938222169876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6808044910430908, "epoch": 4.84, "learning_rate": 2.58072696534235e-05, "loss": 0.607, "step": 5724, "task_loss": 1.301774024963379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5031784772872925, "epoch": 4.84, "learning_rate": 2.580304311073542e-05, "loss": 0.5237, "step": 5725, "task_loss": 0.9276639819145203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5277193188667297, "epoch": 4.84, "learning_rate": 2.579881656804734e-05, "loss": 0.5116, "step": 5726, "task_loss": 0.7294089198112488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48168325424194336, "epoch": 4.84, "learning_rate": 2.5794590025359256e-05, "loss": 0.5225, "step": 5727, "task_loss": 0.8994878530502319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3028010129928589, "epoch": 4.84, "learning_rate": 2.5790363482671176e-05, "loss": 0.5108, "step": 5728, "task_loss": 0.41921886801719666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5681043863296509, "epoch": 4.84, "learning_rate": 2.5786136939983096e-05, "loss": 0.4543, "step": 5729, "task_loss": 0.4027162194252014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5804601907730103, "epoch": 4.84, "learning_rate": 2.5781910397295012e-05, "loss": 0.5915, "step": 5730, "task_loss": 0.2755051553249359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45864808559417725, "epoch": 4.84, "learning_rate": 2.5777683854606932e-05, "loss": 0.5924, "step": 5731, "task_loss": 0.6574577689170837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5979812145233154, "epoch": 4.84, "learning_rate": 2.577345731191885e-05, "loss": 0.5915, "step": 5732, "task_loss": 0.8693372011184692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5351882576942444, "epoch": 4.85, "learning_rate": 2.5769230769230768e-05, "loss": 0.6481, "step": 5733, "task_loss": 1.0330792665481567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38662809133529663, "epoch": 4.85, "learning_rate": 2.5765004226542688e-05, "loss": 0.3976, "step": 5734, "task_loss": 0.8479583859443665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4904678165912628, "epoch": 4.85, "learning_rate": 2.5760777683854607e-05, "loss": 0.5935, "step": 5735, "task_loss": 1.1915929317474365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18958629667758942, "epoch": 4.85, "learning_rate": 2.575655114116653e-05, "loss": 0.5473, "step": 5736, "task_loss": 0.178871750831604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3452599048614502, "epoch": 4.85, "learning_rate": 2.5752324598478444e-05, "loss": 0.3807, "step": 5737, "task_loss": 0.15823009610176086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6254770755767822, "epoch": 4.85, "learning_rate": 2.5748098055790367e-05, "loss": 0.5697, "step": 5738, "task_loss": 0.8473315238952637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45445820689201355, "epoch": 4.85, "learning_rate": 2.5743871513102286e-05, "loss": 0.4999, "step": 5739, "task_loss": 0.42688536643981934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.457449734210968, "epoch": 4.85, "learning_rate": 2.57396449704142e-05, "loss": 0.5356, "step": 5740, "task_loss": 0.4375111758708954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7330834865570068, "epoch": 4.85, "learning_rate": 2.5735418427726123e-05, "loss": 0.5127, "step": 5741, "task_loss": 0.7580153942108154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44900187849998474, "epoch": 4.85, "learning_rate": 2.5731191885038042e-05, "loss": 0.4143, "step": 5742, "task_loss": 0.7130560278892517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4757002890110016, "epoch": 4.85, "learning_rate": 2.5726965342349955e-05, "loss": 0.5065, "step": 5743, "task_loss": 0.08617472648620605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8550693392753601, "epoch": 4.85, "learning_rate": 2.572273879966188e-05, "loss": 0.5284, "step": 5744, "task_loss": 0.9987162947654724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36152395606040955, "epoch": 4.86, "learning_rate": 2.5718512256973798e-05, "loss": 0.4245, "step": 5745, "task_loss": 0.45784759521484375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6456335783004761, "epoch": 4.86, "learning_rate": 2.5714285714285714e-05, "loss": 0.5568, "step": 5746, "task_loss": 1.1442067623138428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35833001136779785, "epoch": 4.86, "learning_rate": 2.5710059171597634e-05, "loss": 0.5218, "step": 5747, "task_loss": 0.6463460326194763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4828640818595886, "epoch": 4.86, "learning_rate": 2.5705832628909554e-05, "loss": 0.4198, "step": 5748, "task_loss": 1.336163878440857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5381168723106384, "epoch": 4.86, "learning_rate": 2.5701606086221474e-05, "loss": 0.483, "step": 5749, "task_loss": 1.245650291442871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6171397566795349, "epoch": 4.86, "learning_rate": 2.569737954353339e-05, "loss": 0.5318, "step": 5750, "task_loss": 0.8073712587356567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7495328187942505, "epoch": 4.86, "learning_rate": 2.569315300084531e-05, "loss": 0.6927, "step": 5751, "task_loss": 0.7202289700508118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3827773332595825, "epoch": 4.86, "learning_rate": 2.568892645815723e-05, "loss": 0.6827, "step": 5752, "task_loss": 1.589543104171753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7276687622070312, "epoch": 4.86, "learning_rate": 2.5684699915469146e-05, "loss": 0.5689, "step": 5753, "task_loss": 0.42279499769210815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.355838418006897, "epoch": 4.86, "learning_rate": 2.5680473372781066e-05, "loss": 0.4216, "step": 5754, "task_loss": 0.6108952760696411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5403056144714355, "epoch": 4.86, "learning_rate": 2.567624683009299e-05, "loss": 0.5262, "step": 5755, "task_loss": 1.629965901374817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4730292558670044, "epoch": 4.87, "learning_rate": 2.56720202874049e-05, "loss": 0.4881, "step": 5756, "task_loss": 1.3579230308532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4675934910774231, "epoch": 4.87, "learning_rate": 2.566779374471682e-05, "loss": 0.3796, "step": 5757, "task_loss": 0.23253118991851807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2809135913848877, "epoch": 4.87, "learning_rate": 2.5663567202028745e-05, "loss": 0.4844, "step": 5758, "task_loss": 0.7059835195541382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6576655507087708, "epoch": 4.87, "learning_rate": 2.5659340659340658e-05, "loss": 0.5792, "step": 5759, "task_loss": 1.1060556173324585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44497209787368774, "epoch": 4.87, "learning_rate": 2.5655114116652577e-05, "loss": 0.5469, "step": 5760, "task_loss": 0.20644617080688477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31369468569755554, "epoch": 4.87, "learning_rate": 2.56508875739645e-05, "loss": 0.4361, "step": 5761, "task_loss": 0.09243584424257278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24879266321659088, "epoch": 4.87, "learning_rate": 2.5646661031276413e-05, "loss": 0.3747, "step": 5762, "task_loss": 0.1869252771139145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49228155612945557, "epoch": 4.87, "learning_rate": 2.5642434488588336e-05, "loss": 0.4204, "step": 5763, "task_loss": 0.16491226851940155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6650729179382324, "epoch": 4.87, "learning_rate": 2.5638207945900256e-05, "loss": 0.544, "step": 5764, "task_loss": 0.8105949759483337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6996420621871948, "epoch": 4.87, "learning_rate": 2.5633981403212176e-05, "loss": 0.4555, "step": 5765, "task_loss": 0.4945085942745209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8733053207397461, "epoch": 4.87, "learning_rate": 2.5629754860524092e-05, "loss": 0.5221, "step": 5766, "task_loss": 1.4503761529922485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5592821836471558, "epoch": 4.87, "learning_rate": 2.5625528317836012e-05, "loss": 0.5416, "step": 5767, "task_loss": 0.9621143937110901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5700168013572693, "epoch": 4.88, "learning_rate": 2.5621301775147932e-05, "loss": 0.4748, "step": 5768, "task_loss": 1.0551432371139526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4817230701446533, "epoch": 4.88, "learning_rate": 2.5617075232459848e-05, "loss": 0.5339, "step": 5769, "task_loss": 0.27261602878570557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.353694885969162, "epoch": 4.88, "learning_rate": 2.5612848689771768e-05, "loss": 0.4851, "step": 5770, "task_loss": 0.24526077508926392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2565145492553711, "epoch": 4.88, "learning_rate": 2.5608622147083688e-05, "loss": 0.5508, "step": 5771, "task_loss": 0.7748639583587646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4367571771144867, "epoch": 4.88, "learning_rate": 2.5604395604395604e-05, "loss": 0.5058, "step": 5772, "task_loss": 1.2265058755874634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6340959072113037, "epoch": 4.88, "learning_rate": 2.5600169061707524e-05, "loss": 0.4916, "step": 5773, "task_loss": 0.6340881586074829 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37358057498931885, "epoch": 4.88, "learning_rate": 2.5595942519019443e-05, "loss": 0.4644, "step": 5774, "task_loss": 0.43144452571868896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23732715845108032, "epoch": 4.88, "learning_rate": 2.559171597633136e-05, "loss": 0.3145, "step": 5775, "task_loss": 0.34335991740226746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814034163951874, "epoch": 4.88, "learning_rate": 2.558748943364328e-05, "loss": 0.4416, "step": 5776, "task_loss": 0.9832383990287781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.450017511844635, "epoch": 4.88, "learning_rate": 2.55832628909552e-05, "loss": 0.5042, "step": 5777, "task_loss": 0.6779274940490723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7310442924499512, "epoch": 4.88, "learning_rate": 2.5579036348267122e-05, "loss": 0.5786, "step": 5778, "task_loss": 1.2296597957611084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30241549015045166, "epoch": 4.88, "learning_rate": 2.5574809805579035e-05, "loss": 0.3852, "step": 5779, "task_loss": 0.8809140920639038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6059231758117676, "epoch": 4.89, "learning_rate": 2.557058326289096e-05, "loss": 0.4327, "step": 5780, "task_loss": 1.1398409605026245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6389366388320923, "epoch": 4.89, "learning_rate": 2.5566356720202878e-05, "loss": 0.5166, "step": 5781, "task_loss": 1.318400502204895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5366359949111938, "epoch": 4.89, "learning_rate": 2.556213017751479e-05, "loss": 0.6158, "step": 5782, "task_loss": 0.4537641704082489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4648832082748413, "epoch": 4.89, "learning_rate": 2.5557903634826714e-05, "loss": 0.5396, "step": 5783, "task_loss": 0.905745804309845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4635361433029175, "epoch": 4.89, "learning_rate": 2.5553677092138634e-05, "loss": 0.3974, "step": 5784, "task_loss": 1.1852481365203857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5991666913032532, "epoch": 4.89, "learning_rate": 2.554945054945055e-05, "loss": 0.4264, "step": 5785, "task_loss": 1.1749745607376099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7029132843017578, "epoch": 4.89, "learning_rate": 2.554522400676247e-05, "loss": 0.5258, "step": 5786, "task_loss": 0.6575952768325806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.2517204284667969, "epoch": 4.89, "learning_rate": 2.554099746407439e-05, "loss": 0.6655, "step": 5787, "task_loss": 0.7345222234725952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39583903551101685, "epoch": 4.89, "learning_rate": 2.5536770921386306e-05, "loss": 0.6035, "step": 5788, "task_loss": 0.8615870475769043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5294861793518066, "epoch": 4.89, "learning_rate": 2.5532544378698226e-05, "loss": 0.3969, "step": 5789, "task_loss": 0.543440043926239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49099695682525635, "epoch": 4.89, "learning_rate": 2.5528317836010146e-05, "loss": 0.5556, "step": 5790, "task_loss": 0.574594259262085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35489338636398315, "epoch": 4.89, "learning_rate": 2.5524091293322062e-05, "loss": 0.4158, "step": 5791, "task_loss": 0.5920946002006531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36102455854415894, "epoch": 4.9, "learning_rate": 2.5519864750633982e-05, "loss": 0.4099, "step": 5792, "task_loss": 0.1813107430934906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3581339120864868, "epoch": 4.9, "learning_rate": 2.55156382079459e-05, "loss": 0.6384, "step": 5793, "task_loss": 1.0016592741012573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25984156131744385, "epoch": 4.9, "learning_rate": 2.551141166525782e-05, "loss": 0.388, "step": 5794, "task_loss": 0.9038347005844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5782009959220886, "epoch": 4.9, "learning_rate": 2.5507185122569738e-05, "loss": 0.5071, "step": 5795, "task_loss": 0.4652150571346283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4317368268966675, "epoch": 4.9, "learning_rate": 2.5502958579881657e-05, "loss": 0.4976, "step": 5796, "task_loss": 0.41916972398757935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9402254819869995, "epoch": 4.9, "learning_rate": 2.549873203719358e-05, "loss": 0.6321, "step": 5797, "task_loss": 0.696761965751648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3938978910446167, "epoch": 4.9, "learning_rate": 2.5494505494505493e-05, "loss": 0.5678, "step": 5798, "task_loss": 0.7342822551727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4212161600589752, "epoch": 4.9, "learning_rate": 2.5490278951817413e-05, "loss": 0.5359, "step": 5799, "task_loss": 0.6637683510780334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4594886302947998, "epoch": 4.9, "learning_rate": 2.5486052409129336e-05, "loss": 0.5406, "step": 5800, "task_loss": 0.6883007884025574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5758783221244812, "epoch": 4.9, "learning_rate": 2.548182586644125e-05, "loss": 0.4502, "step": 5801, "task_loss": 1.2055011987686157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.861035943031311, "epoch": 4.9, "learning_rate": 2.5477599323753172e-05, "loss": 0.5318, "step": 5802, "task_loss": 0.8397704362869263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4103643596172333, "epoch": 4.9, "learning_rate": 2.5473372781065092e-05, "loss": 0.47, "step": 5803, "task_loss": 0.5728910565376282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4958900809288025, "epoch": 4.91, "learning_rate": 2.5469146238377005e-05, "loss": 0.6223, "step": 5804, "task_loss": 0.928153395652771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7503769993782043, "epoch": 4.91, "learning_rate": 2.5464919695688928e-05, "loss": 0.5743, "step": 5805, "task_loss": 0.4201495945453644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44930627942085266, "epoch": 4.91, "learning_rate": 2.5460693153000848e-05, "loss": 0.4695, "step": 5806, "task_loss": 0.40101706981658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5396455526351929, "epoch": 4.91, "learning_rate": 2.5456466610312768e-05, "loss": 0.4015, "step": 5807, "task_loss": 0.7356283664703369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47926023602485657, "epoch": 4.91, "learning_rate": 2.5452240067624684e-05, "loss": 0.665, "step": 5808, "task_loss": 0.41746148467063904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5205321907997131, "epoch": 4.91, "learning_rate": 2.5448013524936604e-05, "loss": 0.4636, "step": 5809, "task_loss": 0.9346333742141724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33122286200523376, "epoch": 4.91, "learning_rate": 2.5443786982248524e-05, "loss": 0.4602, "step": 5810, "task_loss": 0.8968300223350525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.361167848110199, "epoch": 4.91, "learning_rate": 2.543956043956044e-05, "loss": 0.5609, "step": 5811, "task_loss": 0.6878184080123901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4316611886024475, "epoch": 4.91, "learning_rate": 2.543533389687236e-05, "loss": 0.4646, "step": 5812, "task_loss": 0.28144291043281555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3572077751159668, "epoch": 4.91, "learning_rate": 2.543110735418428e-05, "loss": 0.4409, "step": 5813, "task_loss": 0.7670496106147766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44395148754119873, "epoch": 4.91, "learning_rate": 2.5426880811496196e-05, "loss": 0.5582, "step": 5814, "task_loss": 0.4610663652420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45230865478515625, "epoch": 4.91, "learning_rate": 2.5422654268808115e-05, "loss": 0.5096, "step": 5815, "task_loss": 0.9117465019226074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43482837080955505, "epoch": 4.92, "learning_rate": 2.5418427726120035e-05, "loss": 0.3758, "step": 5816, "task_loss": 0.8503986597061157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35342341661453247, "epoch": 4.92, "learning_rate": 2.541420118343195e-05, "loss": 0.3736, "step": 5817, "task_loss": 0.7412182092666626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48658478260040283, "epoch": 4.92, "learning_rate": 2.540997464074387e-05, "loss": 0.51, "step": 5818, "task_loss": 0.3380177617073059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40707266330718994, "epoch": 4.92, "learning_rate": 2.5405748098055794e-05, "loss": 0.399, "step": 5819, "task_loss": 0.25088635087013245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4398203492164612, "epoch": 4.92, "learning_rate": 2.5401521555367707e-05, "loss": 0.5561, "step": 5820, "task_loss": 0.499479740858078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5047560930252075, "epoch": 4.92, "learning_rate": 2.5397295012679627e-05, "loss": 0.4849, "step": 5821, "task_loss": 1.1149452924728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3739008903503418, "epoch": 4.92, "learning_rate": 2.539306846999155e-05, "loss": 0.48, "step": 5822, "task_loss": 0.9758245348930359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7033113241195679, "epoch": 4.92, "learning_rate": 2.538884192730347e-05, "loss": 0.4864, "step": 5823, "task_loss": 0.9139550924301147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.586179256439209, "epoch": 4.92, "learning_rate": 2.5384615384615383e-05, "loss": 0.4724, "step": 5824, "task_loss": 0.630351185798645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5750783681869507, "epoch": 4.92, "learning_rate": 2.5380388841927306e-05, "loss": 0.4485, "step": 5825, "task_loss": 0.2937678396701813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8464914560317993, "epoch": 4.92, "learning_rate": 2.5376162299239226e-05, "loss": 0.5393, "step": 5826, "task_loss": 1.059510588645935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4885863661766052, "epoch": 4.93, "learning_rate": 2.5371935756551142e-05, "loss": 0.589, "step": 5827, "task_loss": 0.4706907868385315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31396862864494324, "epoch": 4.93, "learning_rate": 2.5367709213863062e-05, "loss": 0.4182, "step": 5828, "task_loss": 0.18040288984775543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.496757447719574, "epoch": 4.93, "learning_rate": 2.536348267117498e-05, "loss": 0.5786, "step": 5829, "task_loss": 1.1320637464523315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5502353310585022, "epoch": 4.93, "learning_rate": 2.5359256128486898e-05, "loss": 0.4745, "step": 5830, "task_loss": 0.17524020373821259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6988099217414856, "epoch": 4.93, "learning_rate": 2.5355029585798818e-05, "loss": 0.5978, "step": 5831, "task_loss": 0.525827944278717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48440343141555786, "epoch": 4.93, "learning_rate": 2.5350803043110737e-05, "loss": 0.4035, "step": 5832, "task_loss": 0.9719178676605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49175530672073364, "epoch": 4.93, "learning_rate": 2.5346576500422654e-05, "loss": 0.4928, "step": 5833, "task_loss": 0.4114224910736084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3087288737297058, "epoch": 4.93, "learning_rate": 2.5342349957734574e-05, "loss": 0.4708, "step": 5834, "task_loss": 0.21083305776119232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5517680644989014, "epoch": 4.93, "learning_rate": 2.5338123415046493e-05, "loss": 0.6133, "step": 5835, "task_loss": 0.46359625458717346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.664401650428772, "epoch": 4.93, "learning_rate": 2.5333896872358413e-05, "loss": 0.5442, "step": 5836, "task_loss": 1.2301784753799438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19935551285743713, "epoch": 4.93, "learning_rate": 2.532967032967033e-05, "loss": 0.4321, "step": 5837, "task_loss": 0.23818977177143097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5411819815635681, "epoch": 4.93, "learning_rate": 2.532544378698225e-05, "loss": 0.5597, "step": 5838, "task_loss": 0.728217601776123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37624186277389526, "epoch": 4.94, "learning_rate": 2.5321217244294172e-05, "loss": 0.4684, "step": 5839, "task_loss": 0.16302083432674408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.308685839176178, "epoch": 4.94, "learning_rate": 2.5316990701606085e-05, "loss": 0.4068, "step": 5840, "task_loss": 0.19114889204502106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37312954664230347, "epoch": 4.94, "learning_rate": 2.5312764158918005e-05, "loss": 0.4076, "step": 5841, "task_loss": 0.5269535183906555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.446185827255249, "epoch": 4.94, "learning_rate": 2.5308537616229928e-05, "loss": 0.4988, "step": 5842, "task_loss": 0.20380905270576477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4144616723060608, "epoch": 4.94, "learning_rate": 2.530431107354184e-05, "loss": 0.5185, "step": 5843, "task_loss": 0.38356897234916687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9075977206230164, "epoch": 4.94, "learning_rate": 2.5300084530853764e-05, "loss": 0.5909, "step": 5844, "task_loss": 1.2336329221725464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25992679595947266, "epoch": 4.94, "learning_rate": 2.5295857988165684e-05, "loss": 0.4263, "step": 5845, "task_loss": 0.09076137840747833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5832650065422058, "epoch": 4.94, "learning_rate": 2.5291631445477597e-05, "loss": 0.4467, "step": 5846, "task_loss": 0.5090808868408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34500059485435486, "epoch": 4.94, "learning_rate": 2.528740490278952e-05, "loss": 0.5137, "step": 5847, "task_loss": 0.16382452845573425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.56105637550354, "epoch": 4.94, "learning_rate": 2.528317836010144e-05, "loss": 0.562, "step": 5848, "task_loss": 1.9156055450439453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5462300181388855, "epoch": 4.94, "learning_rate": 2.5278951817413356e-05, "loss": 0.4513, "step": 5849, "task_loss": 0.2994759678840637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45235100388526917, "epoch": 4.94, "learning_rate": 2.5274725274725276e-05, "loss": 0.5395, "step": 5850, "task_loss": 1.000086784362793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35647204518318176, "epoch": 4.95, "learning_rate": 2.5270498732037196e-05, "loss": 0.5548, "step": 5851, "task_loss": 0.16849172115325928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5011870861053467, "epoch": 4.95, "learning_rate": 2.5266272189349115e-05, "loss": 0.6782, "step": 5852, "task_loss": 1.1982852220535278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3965410888195038, "epoch": 4.95, "learning_rate": 2.526204564666103e-05, "loss": 0.4441, "step": 5853, "task_loss": 0.2527925670146942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3198370337486267, "epoch": 4.95, "learning_rate": 2.525781910397295e-05, "loss": 0.5133, "step": 5854, "task_loss": 0.5387527942657471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3331337869167328, "epoch": 4.95, "learning_rate": 2.525359256128487e-05, "loss": 0.5982, "step": 5855, "task_loss": 0.40613484382629395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35700592398643494, "epoch": 4.95, "learning_rate": 2.5249366018596787e-05, "loss": 0.4631, "step": 5856, "task_loss": 0.14375852048397064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5817124843597412, "epoch": 4.95, "learning_rate": 2.5245139475908707e-05, "loss": 0.4992, "step": 5857, "task_loss": 0.7463344931602478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6648348569869995, "epoch": 4.95, "learning_rate": 2.5240912933220627e-05, "loss": 0.6135, "step": 5858, "task_loss": 1.1142513751983643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40841737389564514, "epoch": 4.95, "learning_rate": 2.5236686390532543e-05, "loss": 0.6678, "step": 5859, "task_loss": 0.6890674829483032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47071123123168945, "epoch": 4.95, "learning_rate": 2.5232459847844463e-05, "loss": 0.3825, "step": 5860, "task_loss": 0.6409825086593628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6167083978652954, "epoch": 4.95, "learning_rate": 2.5228233305156386e-05, "loss": 0.6933, "step": 5861, "task_loss": 0.9716223478317261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28745535016059875, "epoch": 4.95, "learning_rate": 2.52240067624683e-05, "loss": 0.4906, "step": 5862, "task_loss": 0.04861461743712425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3605726659297943, "epoch": 4.96, "learning_rate": 2.521978021978022e-05, "loss": 0.5034, "step": 5863, "task_loss": 0.26824408769607544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3527558445930481, "epoch": 4.96, "learning_rate": 2.5215553677092142e-05, "loss": 0.5126, "step": 5864, "task_loss": 0.31379178166389465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35328710079193115, "epoch": 4.96, "learning_rate": 2.5211327134404062e-05, "loss": 0.5614, "step": 5865, "task_loss": 1.0871851444244385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4643189311027527, "epoch": 4.96, "learning_rate": 2.5207100591715978e-05, "loss": 0.4109, "step": 5866, "task_loss": 0.4647694230079651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48734110593795776, "epoch": 4.96, "learning_rate": 2.5202874049027898e-05, "loss": 0.5673, "step": 5867, "task_loss": 1.3344870805740356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45599859952926636, "epoch": 4.96, "learning_rate": 2.5198647506339818e-05, "loss": 0.4393, "step": 5868, "task_loss": 0.8248142600059509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7107944488525391, "epoch": 4.96, "learning_rate": 2.5194420963651734e-05, "loss": 0.687, "step": 5869, "task_loss": 1.3206456899642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012810230255127, "epoch": 4.96, "learning_rate": 2.5190194420963654e-05, "loss": 0.5362, "step": 5870, "task_loss": 1.2237449884414673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30335938930511475, "epoch": 4.96, "learning_rate": 2.5185967878275573e-05, "loss": 0.3399, "step": 5871, "task_loss": 0.13658776879310608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6419541835784912, "epoch": 4.96, "learning_rate": 2.518174133558749e-05, "loss": 0.5729, "step": 5872, "task_loss": 0.7049633264541626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6737095713615417, "epoch": 4.96, "learning_rate": 2.517751479289941e-05, "loss": 0.3996, "step": 5873, "task_loss": 0.5167071223258972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38763627409935, "epoch": 4.96, "learning_rate": 2.517328825021133e-05, "loss": 0.4745, "step": 5874, "task_loss": 0.4651373624801636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.506541907787323, "epoch": 4.97, "learning_rate": 2.5169061707523246e-05, "loss": 0.4901, "step": 5875, "task_loss": 0.9345484375953674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7523743510246277, "epoch": 4.97, "learning_rate": 2.5164835164835165e-05, "loss": 0.5872, "step": 5876, "task_loss": 0.2797583341598511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2746802568435669, "epoch": 4.97, "learning_rate": 2.5160608622147085e-05, "loss": 0.5039, "step": 5877, "task_loss": 0.8425626158714294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2547357678413391, "epoch": 4.97, "learning_rate": 2.5156382079459e-05, "loss": 0.5076, "step": 5878, "task_loss": 0.5859923362731934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9058804512023926, "epoch": 4.97, "learning_rate": 2.515215553677092e-05, "loss": 0.7911, "step": 5879, "task_loss": 1.2105085849761963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3001091778278351, "epoch": 4.97, "learning_rate": 2.514792899408284e-05, "loss": 0.3576, "step": 5880, "task_loss": 0.33678141236305237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5725070238113403, "epoch": 4.97, "learning_rate": 2.5143702451394764e-05, "loss": 0.6149, "step": 5881, "task_loss": 1.29972505569458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4795590937137604, "epoch": 4.97, "learning_rate": 2.5139475908706677e-05, "loss": 0.5009, "step": 5882, "task_loss": 0.8522188663482666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4290587306022644, "epoch": 4.97, "learning_rate": 2.51352493660186e-05, "loss": 0.4123, "step": 5883, "task_loss": 0.7262642979621887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4498727321624756, "epoch": 4.97, "learning_rate": 2.513102282333052e-05, "loss": 0.5662, "step": 5884, "task_loss": 0.328357070684433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4463154673576355, "epoch": 4.97, "learning_rate": 2.5126796280642433e-05, "loss": 0.4233, "step": 5885, "task_loss": 0.628167986869812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4322727918624878, "epoch": 4.97, "learning_rate": 2.5122569737954356e-05, "loss": 0.3901, "step": 5886, "task_loss": 0.5687154531478882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.387350469827652, "epoch": 4.98, "learning_rate": 2.5118343195266276e-05, "loss": 0.4297, "step": 5887, "task_loss": 0.3951967656612396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9616945385932922, "epoch": 4.98, "learning_rate": 2.511411665257819e-05, "loss": 0.5759, "step": 5888, "task_loss": 0.7422794699668884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2975861728191376, "epoch": 4.98, "learning_rate": 2.5109890109890112e-05, "loss": 0.4499, "step": 5889, "task_loss": 0.2681705951690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6026228666305542, "epoch": 4.98, "learning_rate": 2.510566356720203e-05, "loss": 0.4986, "step": 5890, "task_loss": 0.708349347114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3391422927379608, "epoch": 4.98, "learning_rate": 2.5101437024513948e-05, "loss": 0.4834, "step": 5891, "task_loss": 0.35420122742652893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5993404388427734, "epoch": 4.98, "learning_rate": 2.5097210481825868e-05, "loss": 0.6312, "step": 5892, "task_loss": 0.2654555141925812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5072178840637207, "epoch": 4.98, "learning_rate": 2.5092983939137787e-05, "loss": 0.4789, "step": 5893, "task_loss": 0.23766012489795685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8217976689338684, "epoch": 4.98, "learning_rate": 2.5088757396449707e-05, "loss": 0.5146, "step": 5894, "task_loss": 1.3266856670379639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3787155747413635, "epoch": 4.98, "learning_rate": 2.5084530853761623e-05, "loss": 0.5612, "step": 5895, "task_loss": 0.7959149479866028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7610986232757568, "epoch": 4.98, "learning_rate": 2.5080304311073543e-05, "loss": 0.5394, "step": 5896, "task_loss": 0.5654633641242981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4275207221508026, "epoch": 4.98, "learning_rate": 2.5076077768385463e-05, "loss": 0.4836, "step": 5897, "task_loss": 0.4208388030529022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3316773772239685, "epoch": 4.99, "learning_rate": 2.507185122569738e-05, "loss": 0.3813, "step": 5898, "task_loss": 0.47392022609710693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5100721716880798, "epoch": 4.99, "learning_rate": 2.50676246830093e-05, "loss": 0.4499, "step": 5899, "task_loss": 0.7384151220321655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7506421804428101, "epoch": 4.99, "learning_rate": 2.506339814032122e-05, "loss": 0.5957, "step": 5900, "task_loss": 0.9187194108963013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42261719703674316, "epoch": 4.99, "learning_rate": 2.5059171597633135e-05, "loss": 0.4444, "step": 5901, "task_loss": 0.4331877827644348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8414658904075623, "epoch": 4.99, "learning_rate": 2.5054945054945055e-05, "loss": 0.6473, "step": 5902, "task_loss": 1.207355260848999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6883753538131714, "epoch": 4.99, "learning_rate": 2.5050718512256978e-05, "loss": 0.5487, "step": 5903, "task_loss": 0.25709763169288635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48178380727767944, "epoch": 4.99, "learning_rate": 2.504649196956889e-05, "loss": 0.4289, "step": 5904, "task_loss": 0.5272418856620789 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5265001058578491, "epoch": 4.99, "learning_rate": 2.504226542688081e-05, "loss": 0.4962, "step": 5905, "task_loss": 0.6459428668022156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34180378913879395, "epoch": 4.99, "learning_rate": 2.5038038884192734e-05, "loss": 0.4022, "step": 5906, "task_loss": 0.6246843338012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5612274408340454, "epoch": 4.99, "learning_rate": 2.5033812341504647e-05, "loss": 0.6222, "step": 5907, "task_loss": 0.9123996496200562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5337389707565308, "epoch": 4.99, "learning_rate": 2.502958579881657e-05, "loss": 0.6182, "step": 5908, "task_loss": 0.5444350242614746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4110601246356964, "epoch": 4.99, "learning_rate": 2.502535925612849e-05, "loss": 0.554, "step": 5909, "task_loss": 1.1575924158096313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7683933973312378, "epoch": 5.0, "learning_rate": 2.502113271344041e-05, "loss": 0.6302, "step": 5910, "task_loss": 0.9224808216094971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31058940291404724, "epoch": 5.0, "learning_rate": 2.5016906170752326e-05, "loss": 0.417, "step": 5911, "task_loss": 1.0095516443252563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8734809756278992, "epoch": 5.0, "learning_rate": 2.5012679628064245e-05, "loss": 0.5721, "step": 5912, "task_loss": 0.343243271112442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5232601165771484, "epoch": 5.0, "learning_rate": 2.5008453085376165e-05, "loss": 0.4468, "step": 5913, "task_loss": 0.28599193692207336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5779330134391785, "epoch": 5.0, "learning_rate": 2.500422654268808e-05, "loss": 0.4849, "step": 5914, "task_loss": 0.6249581575393677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36032405495643616, "epoch": 5.0, "learning_rate": 2.5e-05, "loss": 0.4809, "step": 5915, "task_loss": 0.5216179490089417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6082985401153564, "epoch": 5.0, "learning_rate": 2.4995773457311918e-05, "loss": 0.9102, "step": 5916, "task_loss": 0.6683598160743713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8055363893508911, "epoch": 5.0, "learning_rate": 2.499154691462384e-05, "loss": 0.492, "step": 5917, "task_loss": 0.9046040177345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5577858686447144, "epoch": 5.0, "learning_rate": 2.4987320371935757e-05, "loss": 0.5702, "step": 5918, "task_loss": 1.107552170753479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33932915329933167, "epoch": 5.0, "learning_rate": 2.4983093829247677e-05, "loss": 0.4536, "step": 5919, "task_loss": 0.34433531761169434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.719271183013916, "epoch": 5.0, "learning_rate": 2.4978867286559597e-05, "loss": 0.6003, "step": 5920, "task_loss": 0.6446596384048462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7384810447692871, "epoch": 5.01, "learning_rate": 2.4974640743871513e-05, "loss": 0.5199, "step": 5921, "task_loss": 0.3789003789424896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3647576868534088, "epoch": 5.01, "learning_rate": 2.4970414201183433e-05, "loss": 0.5021, "step": 5922, "task_loss": 0.6205793619155884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6078684329986572, "epoch": 5.01, "learning_rate": 2.4966187658495352e-05, "loss": 0.6143, "step": 5923, "task_loss": 0.6133117079734802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3312162458896637, "epoch": 5.01, "learning_rate": 2.496196111580727e-05, "loss": 0.3951, "step": 5924, "task_loss": 0.554440975189209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5120965242385864, "epoch": 5.01, "learning_rate": 2.4957734573119192e-05, "loss": 0.4515, "step": 5925, "task_loss": 0.6741136312484741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25445348024368286, "epoch": 5.01, "learning_rate": 2.4953508030431108e-05, "loss": 0.4508, "step": 5926, "task_loss": 0.8903825283050537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3473491370677948, "epoch": 5.01, "learning_rate": 2.4949281487743028e-05, "loss": 0.3818, "step": 5927, "task_loss": 1.3895976543426514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33885765075683594, "epoch": 5.01, "learning_rate": 2.4945054945054948e-05, "loss": 0.4322, "step": 5928, "task_loss": 0.08260716497898102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5117425918579102, "epoch": 5.01, "learning_rate": 2.4940828402366864e-05, "loss": 0.3731, "step": 5929, "task_loss": 0.23022602498531342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3977622091770172, "epoch": 5.01, "learning_rate": 2.4936601859678784e-05, "loss": 0.479, "step": 5930, "task_loss": 0.2684060335159302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.608170211315155, "epoch": 5.01, "learning_rate": 2.4932375316990703e-05, "loss": 0.5224, "step": 5931, "task_loss": 1.3077064752578735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2939225435256958, "epoch": 5.01, "learning_rate": 2.492814877430262e-05, "loss": 0.3852, "step": 5932, "task_loss": 0.43751248717308044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42494404315948486, "epoch": 5.02, "learning_rate": 2.492392223161454e-05, "loss": 0.4691, "step": 5933, "task_loss": 1.1505721807479858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19124269485473633, "epoch": 5.02, "learning_rate": 2.491969568892646e-05, "loss": 0.4465, "step": 5934, "task_loss": 0.4035099744796753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6567538976669312, "epoch": 5.02, "learning_rate": 2.491546914623838e-05, "loss": 0.431, "step": 5935, "task_loss": 0.4618139863014221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3672664165496826, "epoch": 5.02, "learning_rate": 2.49112426035503e-05, "loss": 0.4516, "step": 5936, "task_loss": 0.14847822487354279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3138543367385864, "epoch": 5.02, "learning_rate": 2.4907016060862215e-05, "loss": 0.3702, "step": 5937, "task_loss": 0.53508061170578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42156708240509033, "epoch": 5.02, "learning_rate": 2.4902789518174135e-05, "loss": 0.456, "step": 5938, "task_loss": 0.22390834987163544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6893063187599182, "epoch": 5.02, "learning_rate": 2.4898562975486055e-05, "loss": 0.546, "step": 5939, "task_loss": 1.4197015762329102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7896542549133301, "epoch": 5.02, "learning_rate": 2.489433643279797e-05, "loss": 0.5581, "step": 5940, "task_loss": 0.5447805523872375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5559636354446411, "epoch": 5.02, "learning_rate": 2.489010989010989e-05, "loss": 0.5189, "step": 5941, "task_loss": 1.4816220998764038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7295690774917603, "epoch": 5.02, "learning_rate": 2.488588334742181e-05, "loss": 0.5012, "step": 5942, "task_loss": 0.8214719295501709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3193829655647278, "epoch": 5.02, "learning_rate": 2.488165680473373e-05, "loss": 0.5085, "step": 5943, "task_loss": 0.42772430181503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41968590021133423, "epoch": 5.02, "learning_rate": 2.4877430262045647e-05, "loss": 0.5842, "step": 5944, "task_loss": 0.9024659395217896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5182512998580933, "epoch": 5.03, "learning_rate": 2.4873203719357566e-05, "loss": 0.5233, "step": 5945, "task_loss": 0.6191232800483704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7017192244529724, "epoch": 5.03, "learning_rate": 2.4868977176669486e-05, "loss": 0.4193, "step": 5946, "task_loss": 0.28791308403015137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37173032760620117, "epoch": 5.03, "learning_rate": 2.4864750633981402e-05, "loss": 0.587, "step": 5947, "task_loss": 0.6532882452011108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24450619518756866, "epoch": 5.03, "learning_rate": 2.4860524091293325e-05, "loss": 0.429, "step": 5948, "task_loss": 1.1670103073120117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2502221465110779, "epoch": 5.03, "learning_rate": 2.4856297548605242e-05, "loss": 0.4137, "step": 5949, "task_loss": 0.3373726010322571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42184120416641235, "epoch": 5.03, "learning_rate": 2.485207100591716e-05, "loss": 0.4319, "step": 5950, "task_loss": 0.5769553184509277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3875751495361328, "epoch": 5.03, "learning_rate": 2.484784446322908e-05, "loss": 0.5062, "step": 5951, "task_loss": 0.9066849946975708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7745203971862793, "epoch": 5.03, "learning_rate": 2.4843617920540998e-05, "loss": 0.5165, "step": 5952, "task_loss": 0.45830780267715454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.358915239572525, "epoch": 5.03, "learning_rate": 2.4839391377852917e-05, "loss": 0.4424, "step": 5953, "task_loss": 0.9698063731193542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42141085863113403, "epoch": 5.03, "learning_rate": 2.4835164835164837e-05, "loss": 0.4029, "step": 5954, "task_loss": 0.37209227681159973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8922230005264282, "epoch": 5.03, "learning_rate": 2.4830938292476753e-05, "loss": 0.6084, "step": 5955, "task_loss": 0.8888617753982544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4024159014225006, "epoch": 5.03, "learning_rate": 2.4826711749788677e-05, "loss": 0.3417, "step": 5956, "task_loss": 0.3548760712146759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25430941581726074, "epoch": 5.04, "learning_rate": 2.4822485207100593e-05, "loss": 0.3777, "step": 5957, "task_loss": 0.1606295108795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4838330149650574, "epoch": 5.04, "learning_rate": 2.481825866441251e-05, "loss": 0.4848, "step": 5958, "task_loss": 0.4661611020565033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43831878900527954, "epoch": 5.04, "learning_rate": 2.4814032121724432e-05, "loss": 0.4736, "step": 5959, "task_loss": 1.5398967266082764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3489435315132141, "epoch": 5.04, "learning_rate": 2.480980557903635e-05, "loss": 0.4425, "step": 5960, "task_loss": 0.31673723459243774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31559956073760986, "epoch": 5.04, "learning_rate": 2.480557903634827e-05, "loss": 0.5137, "step": 5961, "task_loss": 0.19639374315738678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6046870946884155, "epoch": 5.04, "learning_rate": 2.4801352493660188e-05, "loss": 0.5283, "step": 5962, "task_loss": 0.6970154047012329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7100826501846313, "epoch": 5.04, "learning_rate": 2.4797125950972105e-05, "loss": 0.5779, "step": 5963, "task_loss": 1.6279972791671753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5161731839179993, "epoch": 5.04, "learning_rate": 2.4792899408284024e-05, "loss": 0.5408, "step": 5964, "task_loss": 0.6651297211647034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5025107860565186, "epoch": 5.04, "learning_rate": 2.4788672865595944e-05, "loss": 0.4349, "step": 5965, "task_loss": 1.3200857639312744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.720709502696991, "epoch": 5.04, "learning_rate": 2.478444632290786e-05, "loss": 0.704, "step": 5966, "task_loss": 1.0721192359924316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4754113554954529, "epoch": 5.04, "learning_rate": 2.4780219780219784e-05, "loss": 0.457, "step": 5967, "task_loss": 0.7590563297271729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5752441883087158, "epoch": 5.04, "learning_rate": 2.47759932375317e-05, "loss": 0.5556, "step": 5968, "task_loss": 0.9572807550430298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6993789672851562, "epoch": 5.05, "learning_rate": 2.4771766694843616e-05, "loss": 0.5985, "step": 5969, "task_loss": 1.6071454286575317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2880752682685852, "epoch": 5.05, "learning_rate": 2.476754015215554e-05, "loss": 0.5232, "step": 5970, "task_loss": 0.16263897716999054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6551597714424133, "epoch": 5.05, "learning_rate": 2.4763313609467456e-05, "loss": 0.4821, "step": 5971, "task_loss": 1.11124587059021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44097816944122314, "epoch": 5.05, "learning_rate": 2.4759087066779375e-05, "loss": 0.5146, "step": 5972, "task_loss": 0.882554292678833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35184478759765625, "epoch": 5.05, "learning_rate": 2.4754860524091295e-05, "loss": 0.3635, "step": 5973, "task_loss": 0.6743343472480774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5512925386428833, "epoch": 5.05, "learning_rate": 2.475063398140321e-05, "loss": 0.5301, "step": 5974, "task_loss": 0.5781324505805969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6125876903533936, "epoch": 5.05, "learning_rate": 2.474640743871513e-05, "loss": 0.4122, "step": 5975, "task_loss": 0.685910701751709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5098785758018494, "epoch": 5.05, "learning_rate": 2.474218089602705e-05, "loss": 0.4769, "step": 5976, "task_loss": 1.2178208827972412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45047181844711304, "epoch": 5.05, "learning_rate": 2.473795435333897e-05, "loss": 0.5641, "step": 5977, "task_loss": 0.36383482813835144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40200772881507874, "epoch": 5.05, "learning_rate": 2.473372781065089e-05, "loss": 0.4326, "step": 5978, "task_loss": 0.18079520761966705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33880242705345154, "epoch": 5.05, "learning_rate": 2.4729501267962807e-05, "loss": 0.3363, "step": 5979, "task_loss": 0.3427959084510803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33176741003990173, "epoch": 5.05, "learning_rate": 2.4725274725274727e-05, "loss": 0.538, "step": 5980, "task_loss": 0.29471755027770996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.460461288690567, "epoch": 5.06, "learning_rate": 2.4721048182586646e-05, "loss": 0.4772, "step": 5981, "task_loss": 1.1523021459579468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48022761940956116, "epoch": 5.06, "learning_rate": 2.4716821639898563e-05, "loss": 0.5004, "step": 5982, "task_loss": 0.948471188545227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37389588356018066, "epoch": 5.06, "learning_rate": 2.4712595097210482e-05, "loss": 0.4084, "step": 5983, "task_loss": 0.3665182590484619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.589600682258606, "epoch": 5.06, "learning_rate": 2.4708368554522402e-05, "loss": 0.6166, "step": 5984, "task_loss": 0.78522789478302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6796927452087402, "epoch": 5.06, "learning_rate": 2.4704142011834322e-05, "loss": 0.5379, "step": 5985, "task_loss": 1.232219934463501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4793018698692322, "epoch": 5.06, "learning_rate": 2.4699915469146238e-05, "loss": 0.4681, "step": 5986, "task_loss": 0.5304792523384094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.546089768409729, "epoch": 5.06, "learning_rate": 2.4695688926458158e-05, "loss": 0.6239, "step": 5987, "task_loss": 2.0606584548950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5196162462234497, "epoch": 5.06, "learning_rate": 2.4691462383770078e-05, "loss": 0.4726, "step": 5988, "task_loss": 0.33798322081565857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.391682505607605, "epoch": 5.06, "learning_rate": 2.4687235841081997e-05, "loss": 0.4453, "step": 5989, "task_loss": 1.0684736967086792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5201181769371033, "epoch": 5.06, "learning_rate": 2.4683009298393914e-05, "loss": 0.5392, "step": 5990, "task_loss": 0.6769152879714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4350537955760956, "epoch": 5.06, "learning_rate": 2.4678782755705834e-05, "loss": 0.5484, "step": 5991, "task_loss": 1.0718979835510254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.280165433883667, "epoch": 5.07, "learning_rate": 2.4674556213017753e-05, "loss": 0.3574, "step": 5992, "task_loss": 0.17201143503189087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5555096864700317, "epoch": 5.07, "learning_rate": 2.4670329670329673e-05, "loss": 0.5498, "step": 5993, "task_loss": 1.1042184829711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.951995849609375, "epoch": 5.07, "learning_rate": 2.466610312764159e-05, "loss": 0.5201, "step": 5994, "task_loss": 1.2007079124450684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.345053493976593, "epoch": 5.07, "learning_rate": 2.466187658495351e-05, "loss": 0.5336, "step": 5995, "task_loss": 0.9534551501274109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34661632776260376, "epoch": 5.07, "learning_rate": 2.465765004226543e-05, "loss": 0.4346, "step": 5996, "task_loss": 0.8331199884414673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28205406665802, "epoch": 5.07, "learning_rate": 2.4653423499577345e-05, "loss": 0.4945, "step": 5997, "task_loss": 0.09619945287704468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3882964253425598, "epoch": 5.07, "learning_rate": 2.4649196956889265e-05, "loss": 0.4401, "step": 5998, "task_loss": 0.10834480822086334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39687180519104004, "epoch": 5.07, "learning_rate": 2.4644970414201185e-05, "loss": 0.3267, "step": 5999, "task_loss": 0.514348030090332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3934389352798462, "epoch": 5.07, "learning_rate": 2.4640743871513104e-05, "loss": 0.5421, "step": 6000, "task_loss": 0.18161803483963013 }, { "epoch": 5.07, "eval_accuracy": 0.9048712871287129, "eval_loss": 0.3182193338871002, "eval_runtime": 230.0646, "eval_samples_per_second": 109.752, "eval_steps_per_second": 0.861, "step": 6000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5449993014335632, "epoch": 5.07, "learning_rate": 2.4636517328825024e-05, "loss": 0.4568, "step": 6001, "task_loss": 1.0059036016464233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33474215865135193, "epoch": 5.07, "learning_rate": 2.463229078613694e-05, "loss": 0.5353, "step": 6002, "task_loss": 1.0115423202514648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4833511710166931, "epoch": 5.07, "learning_rate": 2.462806424344886e-05, "loss": 0.4584, "step": 6003, "task_loss": 0.12505419552326202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5871100425720215, "epoch": 5.08, "learning_rate": 2.462383770076078e-05, "loss": 0.4213, "step": 6004, "task_loss": 1.026135802268982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49546924233436584, "epoch": 5.08, "learning_rate": 2.4619611158072696e-05, "loss": 0.4073, "step": 6005, "task_loss": 0.8182092905044556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38001054525375366, "epoch": 5.08, "learning_rate": 2.461538461538462e-05, "loss": 0.3664, "step": 6006, "task_loss": 0.4444688558578491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46788477897644043, "epoch": 5.08, "learning_rate": 2.4611158072696536e-05, "loss": 0.3951, "step": 6007, "task_loss": 0.8514207601547241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3878398537635803, "epoch": 5.08, "learning_rate": 2.4606931530008452e-05, "loss": 0.3701, "step": 6008, "task_loss": 0.42015862464904785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43556442856788635, "epoch": 5.08, "learning_rate": 2.4602704987320375e-05, "loss": 0.4738, "step": 6009, "task_loss": 0.9199793934822083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32857829332351685, "epoch": 5.08, "learning_rate": 2.459847844463229e-05, "loss": 0.565, "step": 6010, "task_loss": 0.17003366351127625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5306485891342163, "epoch": 5.08, "learning_rate": 2.4594251901944208e-05, "loss": 0.5209, "step": 6011, "task_loss": 0.6331378221511841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4917563796043396, "epoch": 5.08, "learning_rate": 2.459002535925613e-05, "loss": 0.5475, "step": 6012, "task_loss": 0.40922629833221436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7640315294265747, "epoch": 5.08, "learning_rate": 2.4585798816568047e-05, "loss": 0.5034, "step": 6013, "task_loss": 1.272016167640686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3246115446090698, "epoch": 5.08, "learning_rate": 2.4581572273879967e-05, "loss": 0.4078, "step": 6014, "task_loss": 0.1390765905380249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4721505641937256, "epoch": 5.08, "learning_rate": 2.4577345731191887e-05, "loss": 0.3577, "step": 6015, "task_loss": 0.20935770869255066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6544747948646545, "epoch": 5.09, "learning_rate": 2.4573119188503803e-05, "loss": 0.424, "step": 6016, "task_loss": 0.7490522861480713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48439788818359375, "epoch": 5.09, "learning_rate": 2.4568892645815726e-05, "loss": 0.5286, "step": 6017, "task_loss": 0.4856950342655182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3707444667816162, "epoch": 5.09, "learning_rate": 2.4564666103127643e-05, "loss": 0.438, "step": 6018, "task_loss": 0.7697510123252869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31932902336120605, "epoch": 5.09, "learning_rate": 2.456043956043956e-05, "loss": 0.5142, "step": 6019, "task_loss": 0.10141529142856598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5338470339775085, "epoch": 5.09, "learning_rate": 2.4556213017751482e-05, "loss": 0.6036, "step": 6020, "task_loss": 0.7379311323165894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8293426036834717, "epoch": 5.09, "learning_rate": 2.45519864750634e-05, "loss": 0.5958, "step": 6021, "task_loss": 1.1147619485855103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35722851753234863, "epoch": 5.09, "learning_rate": 2.454775993237532e-05, "loss": 0.4757, "step": 6022, "task_loss": 0.789341390132904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6629540920257568, "epoch": 5.09, "learning_rate": 2.4543533389687238e-05, "loss": 0.6033, "step": 6023, "task_loss": 0.4335033595561981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4034222364425659, "epoch": 5.09, "learning_rate": 2.4539306846999154e-05, "loss": 0.4392, "step": 6024, "task_loss": 0.09940929710865021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6093487739562988, "epoch": 5.09, "learning_rate": 2.4535080304311074e-05, "loss": 0.5179, "step": 6025, "task_loss": 0.6633531451225281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4297054409980774, "epoch": 5.09, "learning_rate": 2.4530853761622994e-05, "loss": 0.3693, "step": 6026, "task_loss": 0.3079739809036255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46619713306427, "epoch": 5.09, "learning_rate": 2.452662721893491e-05, "loss": 0.543, "step": 6027, "task_loss": 0.1680336594581604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5810904502868652, "epoch": 5.1, "learning_rate": 2.452240067624683e-05, "loss": 0.5541, "step": 6028, "task_loss": 1.3454046249389648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30286654829978943, "epoch": 5.1, "learning_rate": 2.451817413355875e-05, "loss": 0.6656, "step": 6029, "task_loss": 0.5029436945915222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7220510244369507, "epoch": 5.1, "learning_rate": 2.451394759087067e-05, "loss": 0.4913, "step": 6030, "task_loss": 0.6276025772094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37081706523895264, "epoch": 5.1, "learning_rate": 2.450972104818259e-05, "loss": 0.3449, "step": 6031, "task_loss": 0.11580011248588562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4230744540691376, "epoch": 5.1, "learning_rate": 2.4505494505494506e-05, "loss": 0.4936, "step": 6032, "task_loss": 0.4627753496170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46361660957336426, "epoch": 5.1, "learning_rate": 2.4501267962806425e-05, "loss": 0.4934, "step": 6033, "task_loss": 1.1228450536727905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22709789872169495, "epoch": 5.1, "learning_rate": 2.4497041420118345e-05, "loss": 0.3665, "step": 6034, "task_loss": 0.06817236542701721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3843761086463928, "epoch": 5.1, "learning_rate": 2.4492814877430265e-05, "loss": 0.4308, "step": 6035, "task_loss": 0.8635696172714233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4158199429512024, "epoch": 5.1, "learning_rate": 2.448858833474218e-05, "loss": 0.6124, "step": 6036, "task_loss": 0.39460551738739014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5129265785217285, "epoch": 5.1, "learning_rate": 2.44843617920541e-05, "loss": 0.4993, "step": 6037, "task_loss": 0.7277722358703613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4055904746055603, "epoch": 5.1, "learning_rate": 2.448013524936602e-05, "loss": 0.5222, "step": 6038, "task_loss": 0.611846387386322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21709352731704712, "epoch": 5.1, "learning_rate": 2.4475908706677937e-05, "loss": 0.387, "step": 6039, "task_loss": 0.505352258682251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3227002024650574, "epoch": 5.11, "learning_rate": 2.4471682163989857e-05, "loss": 0.3806, "step": 6040, "task_loss": 0.4577132761478424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37678706645965576, "epoch": 5.11, "learning_rate": 2.4467455621301776e-05, "loss": 0.6274, "step": 6041, "task_loss": 0.09494077414274216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5429909229278564, "epoch": 5.11, "learning_rate": 2.4463229078613696e-05, "loss": 0.4627, "step": 6042, "task_loss": 0.44852563738822937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35085201263427734, "epoch": 5.11, "learning_rate": 2.4459002535925616e-05, "loss": 0.3641, "step": 6043, "task_loss": 0.9549075365066528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41243740916252136, "epoch": 5.11, "learning_rate": 2.4454775993237532e-05, "loss": 0.4943, "step": 6044, "task_loss": 0.877136766910553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4605540633201599, "epoch": 5.11, "learning_rate": 2.4450549450549452e-05, "loss": 0.5337, "step": 6045, "task_loss": 0.8325431942939758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31457799673080444, "epoch": 5.11, "learning_rate": 2.4446322907861372e-05, "loss": 0.5164, "step": 6046, "task_loss": 1.3069870471954346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45893871784210205, "epoch": 5.11, "learning_rate": 2.4442096365173288e-05, "loss": 0.5375, "step": 6047, "task_loss": 0.40529030561447144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31940460205078125, "epoch": 5.11, "learning_rate": 2.4437869822485208e-05, "loss": 0.5391, "step": 6048, "task_loss": 0.14863824844360352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49545902013778687, "epoch": 5.11, "learning_rate": 2.4433643279797128e-05, "loss": 0.4807, "step": 6049, "task_loss": 0.9401209354400635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3187169134616852, "epoch": 5.11, "learning_rate": 2.4429416737109044e-05, "loss": 0.4381, "step": 6050, "task_loss": 0.8888756632804871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42782485485076904, "epoch": 5.11, "learning_rate": 2.4425190194420967e-05, "loss": 0.4939, "step": 6051, "task_loss": 0.9968992471694946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5566502213478088, "epoch": 5.12, "learning_rate": 2.4420963651732883e-05, "loss": 0.4957, "step": 6052, "task_loss": 0.7148687839508057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47554680705070496, "epoch": 5.12, "learning_rate": 2.4416737109044803e-05, "loss": 0.5468, "step": 6053, "task_loss": 0.8989934325218201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7137382626533508, "epoch": 5.12, "learning_rate": 2.4412510566356723e-05, "loss": 0.5279, "step": 6054, "task_loss": 1.0508335828781128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48718878626823425, "epoch": 5.12, "learning_rate": 2.440828402366864e-05, "loss": 0.5429, "step": 6055, "task_loss": 1.394580602645874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3145267367362976, "epoch": 5.12, "learning_rate": 2.440405748098056e-05, "loss": 0.4607, "step": 6056, "task_loss": 0.2770923674106598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6932052969932556, "epoch": 5.12, "learning_rate": 2.439983093829248e-05, "loss": 0.4571, "step": 6057, "task_loss": 0.6293013095855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4722960889339447, "epoch": 5.12, "learning_rate": 2.4395604395604395e-05, "loss": 0.4686, "step": 6058, "task_loss": 0.44843149185180664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49339139461517334, "epoch": 5.12, "learning_rate": 2.4391377852916318e-05, "loss": 0.4539, "step": 6059, "task_loss": 0.6607227325439453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3622037172317505, "epoch": 5.12, "learning_rate": 2.4387151310228235e-05, "loss": 0.4488, "step": 6060, "task_loss": 0.2901977598667145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5500739812850952, "epoch": 5.12, "learning_rate": 2.438292476754015e-05, "loss": 0.7224, "step": 6061, "task_loss": 2.1479337215423584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2459859549999237, "epoch": 5.12, "learning_rate": 2.4378698224852074e-05, "loss": 0.3924, "step": 6062, "task_loss": 0.14313580095767975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3563840091228485, "epoch": 5.13, "learning_rate": 2.437447168216399e-05, "loss": 0.4015, "step": 6063, "task_loss": 0.1891101449728012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6395384669303894, "epoch": 5.13, "learning_rate": 2.437024513947591e-05, "loss": 0.4475, "step": 6064, "task_loss": 0.6162099838256836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5693876147270203, "epoch": 5.13, "learning_rate": 2.436601859678783e-05, "loss": 0.4507, "step": 6065, "task_loss": 0.4233049154281616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4631159007549286, "epoch": 5.13, "learning_rate": 2.4361792054099746e-05, "loss": 0.4983, "step": 6066, "task_loss": 0.49086061120033264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46824151277542114, "epoch": 5.13, "learning_rate": 2.4357565511411666e-05, "loss": 0.6295, "step": 6067, "task_loss": 1.0286892652511597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33869636058807373, "epoch": 5.13, "learning_rate": 2.4353338968723586e-05, "loss": 0.588, "step": 6068, "task_loss": 0.5210633873939514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3325342535972595, "epoch": 5.13, "learning_rate": 2.4349112426035502e-05, "loss": 0.5058, "step": 6069, "task_loss": 0.29091042280197144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4005863070487976, "epoch": 5.13, "learning_rate": 2.4344885883347425e-05, "loss": 0.5489, "step": 6070, "task_loss": 0.5245393514633179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6486250162124634, "epoch": 5.13, "learning_rate": 2.434065934065934e-05, "loss": 0.5983, "step": 6071, "task_loss": 1.8012388944625854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5699920058250427, "epoch": 5.13, "learning_rate": 2.433643279797126e-05, "loss": 0.6327, "step": 6072, "task_loss": 1.0488325357437134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.339619517326355, "epoch": 5.13, "learning_rate": 2.433220625528318e-05, "loss": 0.3847, "step": 6073, "task_loss": 0.24479015171527863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44702813029289246, "epoch": 5.13, "learning_rate": 2.4327979712595097e-05, "loss": 0.4508, "step": 6074, "task_loss": 0.9232329726219177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6177733540534973, "epoch": 5.14, "learning_rate": 2.4323753169907017e-05, "loss": 0.6332, "step": 6075, "task_loss": 0.6015790700912476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2910100817680359, "epoch": 5.14, "learning_rate": 2.4319526627218937e-05, "loss": 0.4757, "step": 6076, "task_loss": 0.037308115512132645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6398335695266724, "epoch": 5.14, "learning_rate": 2.4315300084530853e-05, "loss": 0.4791, "step": 6077, "task_loss": 1.3564711809158325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5828233957290649, "epoch": 5.14, "learning_rate": 2.4311073541842773e-05, "loss": 0.5594, "step": 6078, "task_loss": 1.3108546733856201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4572860896587372, "epoch": 5.14, "learning_rate": 2.4306846999154693e-05, "loss": 0.5371, "step": 6079, "task_loss": 0.5129013061523438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31945663690567017, "epoch": 5.14, "learning_rate": 2.4302620456466612e-05, "loss": 0.4847, "step": 6080, "task_loss": 0.2803064286708832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35488054156303406, "epoch": 5.14, "learning_rate": 2.4298393913778532e-05, "loss": 0.4644, "step": 6081, "task_loss": 0.09514985233545303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2647809088230133, "epoch": 5.14, "learning_rate": 2.429416737109045e-05, "loss": 0.4341, "step": 6082, "task_loss": 0.7444067001342773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37407463788986206, "epoch": 5.14, "learning_rate": 2.4289940828402368e-05, "loss": 0.5926, "step": 6083, "task_loss": 0.8440563678741455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33316361904144287, "epoch": 5.14, "learning_rate": 2.4285714285714288e-05, "loss": 0.42, "step": 6084, "task_loss": 0.5290902853012085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20633062720298767, "epoch": 5.14, "learning_rate": 2.4281487743026204e-05, "loss": 0.644, "step": 6085, "task_loss": 0.6980563402175903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4087545573711395, "epoch": 5.14, "learning_rate": 2.4277261200338124e-05, "loss": 0.5343, "step": 6086, "task_loss": 0.6245479583740234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8807015419006348, "epoch": 5.15, "learning_rate": 2.4273034657650044e-05, "loss": 0.6492, "step": 6087, "task_loss": 0.5944237112998962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7398768663406372, "epoch": 5.15, "learning_rate": 2.4268808114961964e-05, "loss": 0.624, "step": 6088, "task_loss": 0.6951228380203247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7016762495040894, "epoch": 5.15, "learning_rate": 2.426458157227388e-05, "loss": 0.4959, "step": 6089, "task_loss": 0.6524354219436646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4098488688468933, "epoch": 5.15, "learning_rate": 2.42603550295858e-05, "loss": 0.4084, "step": 6090, "task_loss": 0.17840828001499176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33483564853668213, "epoch": 5.15, "learning_rate": 2.425612848689772e-05, "loss": 0.4244, "step": 6091, "task_loss": 0.4534497857093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.500361979007721, "epoch": 5.15, "learning_rate": 2.4251901944209636e-05, "loss": 0.4635, "step": 6092, "task_loss": 0.5212643146514893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49990272521972656, "epoch": 5.15, "learning_rate": 2.4247675401521555e-05, "loss": 0.6138, "step": 6093, "task_loss": 0.792811393737793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36293935775756836, "epoch": 5.15, "learning_rate": 2.4243448858833475e-05, "loss": 0.4638, "step": 6094, "task_loss": 0.29829829931259155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4958467483520508, "epoch": 5.15, "learning_rate": 2.4239222316145395e-05, "loss": 0.5225, "step": 6095, "task_loss": 0.779597818851471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5758122205734253, "epoch": 5.15, "learning_rate": 2.4234995773457315e-05, "loss": 0.6499, "step": 6096, "task_loss": 1.5735408067703247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4639711380004883, "epoch": 5.15, "learning_rate": 2.423076923076923e-05, "loss": 0.4198, "step": 6097, "task_loss": 0.8647044897079468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4379214644432068, "epoch": 5.15, "learning_rate": 2.422654268808115e-05, "loss": 0.6026, "step": 6098, "task_loss": 1.3584641218185425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5387359857559204, "epoch": 5.16, "learning_rate": 2.422231614539307e-05, "loss": 0.5422, "step": 6099, "task_loss": 0.8965391516685486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27668148279190063, "epoch": 5.16, "learning_rate": 2.4218089602704987e-05, "loss": 0.5114, "step": 6100, "task_loss": 0.42457109689712524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7864424586296082, "epoch": 5.16, "learning_rate": 2.421386306001691e-05, "loss": 0.6489, "step": 6101, "task_loss": 2.0635387897491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4932714104652405, "epoch": 5.16, "learning_rate": 2.4209636517328826e-05, "loss": 0.5737, "step": 6102, "task_loss": 0.30863773822784424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5333161354064941, "epoch": 5.16, "learning_rate": 2.4205409974640743e-05, "loss": 0.5204, "step": 6103, "task_loss": 1.0516878366470337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3094266355037689, "epoch": 5.16, "learning_rate": 2.4201183431952666e-05, "loss": 0.3886, "step": 6104, "task_loss": 0.4357942044734955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6896642446517944, "epoch": 5.16, "learning_rate": 2.4196956889264582e-05, "loss": 0.448, "step": 6105, "task_loss": 0.25991523265838623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7091655135154724, "epoch": 5.16, "learning_rate": 2.4192730346576502e-05, "loss": 0.5194, "step": 6106, "task_loss": 0.48243609070777893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32731813192367554, "epoch": 5.16, "learning_rate": 2.418850380388842e-05, "loss": 0.4597, "step": 6107, "task_loss": 0.7372286319732666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36532658338546753, "epoch": 5.16, "learning_rate": 2.4184277261200338e-05, "loss": 0.5513, "step": 6108, "task_loss": 0.5837984681129456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36492544412612915, "epoch": 5.16, "learning_rate": 2.4180050718512258e-05, "loss": 0.5102, "step": 6109, "task_loss": 1.5129467248916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4459283947944641, "epoch": 5.16, "learning_rate": 2.4175824175824177e-05, "loss": 0.3916, "step": 6110, "task_loss": 1.0544507503509521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44582539796829224, "epoch": 5.17, "learning_rate": 2.4171597633136094e-05, "loss": 0.5348, "step": 6111, "task_loss": 0.522088885307312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5459232926368713, "epoch": 5.17, "learning_rate": 2.4167371090448017e-05, "loss": 0.5271, "step": 6112, "task_loss": 0.7063331007957458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4671485424041748, "epoch": 5.17, "learning_rate": 2.4163144547759933e-05, "loss": 0.4875, "step": 6113, "task_loss": 0.6119168400764465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6656586527824402, "epoch": 5.17, "learning_rate": 2.415891800507185e-05, "loss": 0.5573, "step": 6114, "task_loss": 0.8138802647590637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49643921852111816, "epoch": 5.17, "learning_rate": 2.4154691462383773e-05, "loss": 0.5104, "step": 6115, "task_loss": 0.15977782011032104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40386420488357544, "epoch": 5.17, "learning_rate": 2.415046491969569e-05, "loss": 0.4368, "step": 6116, "task_loss": 0.763613224029541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5230156183242798, "epoch": 5.17, "learning_rate": 2.414623837700761e-05, "loss": 0.5556, "step": 6117, "task_loss": 0.2062350958585739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3846873342990875, "epoch": 5.17, "learning_rate": 2.414201183431953e-05, "loss": 0.5138, "step": 6118, "task_loss": 0.8341571092605591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4112198054790497, "epoch": 5.17, "learning_rate": 2.4137785291631445e-05, "loss": 0.4862, "step": 6119, "task_loss": 0.30310359597206116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4547218978404999, "epoch": 5.17, "learning_rate": 2.4133558748943365e-05, "loss": 0.3853, "step": 6120, "task_loss": 0.25066572427749634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3382847011089325, "epoch": 5.17, "learning_rate": 2.4129332206255284e-05, "loss": 0.5548, "step": 6121, "task_loss": 0.8832724094390869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46316203474998474, "epoch": 5.17, "learning_rate": 2.41251056635672e-05, "loss": 0.6726, "step": 6122, "task_loss": 0.7226827144622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8395370244979858, "epoch": 5.18, "learning_rate": 2.4120879120879124e-05, "loss": 0.5376, "step": 6123, "task_loss": 0.6016039848327637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4850802421569824, "epoch": 5.18, "learning_rate": 2.411665257819104e-05, "loss": 0.4582, "step": 6124, "task_loss": 0.4139668643474579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5096008777618408, "epoch": 5.18, "learning_rate": 2.411242603550296e-05, "loss": 0.4657, "step": 6125, "task_loss": 0.9118910431861877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8254024982452393, "epoch": 5.18, "learning_rate": 2.410819949281488e-05, "loss": 0.5861, "step": 6126, "task_loss": 0.8758968114852905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39744144678115845, "epoch": 5.18, "learning_rate": 2.4103972950126796e-05, "loss": 0.4624, "step": 6127, "task_loss": 0.12627103924751282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0045619010925293, "epoch": 5.18, "learning_rate": 2.4099746407438716e-05, "loss": 0.5819, "step": 6128, "task_loss": 0.5143957734107971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5418936014175415, "epoch": 5.18, "learning_rate": 2.4095519864750636e-05, "loss": 0.4532, "step": 6129, "task_loss": 1.229182243347168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47781550884246826, "epoch": 5.18, "learning_rate": 2.4091293322062555e-05, "loss": 0.381, "step": 6130, "task_loss": 0.8751018643379211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4547562599182129, "epoch": 5.18, "learning_rate": 2.408706677937447e-05, "loss": 0.4861, "step": 6131, "task_loss": 1.2296758890151978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.516660213470459, "epoch": 5.18, "learning_rate": 2.408284023668639e-05, "loss": 0.5374, "step": 6132, "task_loss": 1.1468462944030762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44642311334609985, "epoch": 5.18, "learning_rate": 2.407861369399831e-05, "loss": 0.4429, "step": 6133, "task_loss": 0.3790968060493469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6537061929702759, "epoch": 5.19, "learning_rate": 2.407438715131023e-05, "loss": 0.5378, "step": 6134, "task_loss": 1.113348364830017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3634786903858185, "epoch": 5.19, "learning_rate": 2.4070160608622147e-05, "loss": 0.4635, "step": 6135, "task_loss": 0.47725826501846313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3838973641395569, "epoch": 5.19, "learning_rate": 2.4065934065934067e-05, "loss": 0.3957, "step": 6136, "task_loss": 1.3570797443389893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4662180244922638, "epoch": 5.19, "learning_rate": 2.4061707523245987e-05, "loss": 0.3766, "step": 6137, "task_loss": 1.1613508462905884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48386332392692566, "epoch": 5.19, "learning_rate": 2.4057480980557906e-05, "loss": 0.4792, "step": 6138, "task_loss": 0.8293060660362244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27913257479667664, "epoch": 5.19, "learning_rate": 2.4053254437869823e-05, "loss": 0.3979, "step": 6139, "task_loss": 0.08585330098867416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5151056051254272, "epoch": 5.19, "learning_rate": 2.4049027895181742e-05, "loss": 0.3968, "step": 6140, "task_loss": 0.1384936422109604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4784975051879883, "epoch": 5.19, "learning_rate": 2.4044801352493662e-05, "loss": 0.4819, "step": 6141, "task_loss": 0.8413895964622498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28375178575515747, "epoch": 5.19, "learning_rate": 2.404057480980558e-05, "loss": 0.4497, "step": 6142, "task_loss": 0.18109892308712006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.270940899848938, "epoch": 5.19, "learning_rate": 2.40363482671175e-05, "loss": 0.502, "step": 6143, "task_loss": 0.07345021516084671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3857145309448242, "epoch": 5.19, "learning_rate": 2.4032121724429418e-05, "loss": 0.433, "step": 6144, "task_loss": 0.6595595479011536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38571372628211975, "epoch": 5.19, "learning_rate": 2.4027895181741338e-05, "loss": 0.5561, "step": 6145, "task_loss": 0.6467337608337402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3356383442878723, "epoch": 5.2, "learning_rate": 2.4023668639053258e-05, "loss": 0.3876, "step": 6146, "task_loss": 0.5467643141746521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33924928307533264, "epoch": 5.2, "learning_rate": 2.4019442096365174e-05, "loss": 0.4639, "step": 6147, "task_loss": 0.31140634417533875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4599660038948059, "epoch": 5.2, "learning_rate": 2.4015215553677094e-05, "loss": 0.4027, "step": 6148, "task_loss": 0.6616432666778564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39545682072639465, "epoch": 5.2, "learning_rate": 2.4010989010989013e-05, "loss": 0.4071, "step": 6149, "task_loss": 0.6132952570915222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3314233422279358, "epoch": 5.2, "learning_rate": 2.400676246830093e-05, "loss": 0.456, "step": 6150, "task_loss": 0.6794623732566833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42838236689567566, "epoch": 5.2, "learning_rate": 2.400253592561285e-05, "loss": 0.4387, "step": 6151, "task_loss": 0.494686484336853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30689436197280884, "epoch": 5.2, "learning_rate": 2.399830938292477e-05, "loss": 0.4499, "step": 6152, "task_loss": 0.21913358569145203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5312821865081787, "epoch": 5.2, "learning_rate": 2.3994082840236686e-05, "loss": 0.5279, "step": 6153, "task_loss": 1.0860376358032227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3166561722755432, "epoch": 5.2, "learning_rate": 2.398985629754861e-05, "loss": 0.479, "step": 6154, "task_loss": 0.43716490268707275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7473480701446533, "epoch": 5.2, "learning_rate": 2.3985629754860525e-05, "loss": 0.4912, "step": 6155, "task_loss": 0.6622763872146606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4961322844028473, "epoch": 5.2, "learning_rate": 2.398140321217244e-05, "loss": 0.5546, "step": 6156, "task_loss": 1.2248502969741821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5788621306419373, "epoch": 5.2, "learning_rate": 2.3977176669484364e-05, "loss": 0.5357, "step": 6157, "task_loss": 1.2150005102157593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4575561583042145, "epoch": 5.21, "learning_rate": 2.397295012679628e-05, "loss": 0.4225, "step": 6158, "task_loss": 1.0713618993759155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42552024126052856, "epoch": 5.21, "learning_rate": 2.39687235841082e-05, "loss": 0.4823, "step": 6159, "task_loss": 0.9159592986106873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5420217514038086, "epoch": 5.21, "learning_rate": 2.396449704142012e-05, "loss": 0.4762, "step": 6160, "task_loss": 0.15733765065670013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5595120787620544, "epoch": 5.21, "learning_rate": 2.3960270498732037e-05, "loss": 0.5666, "step": 6161, "task_loss": 0.52796870470047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5384198427200317, "epoch": 5.21, "learning_rate": 2.395604395604396e-05, "loss": 0.5238, "step": 6162, "task_loss": 1.5352758169174194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41288265585899353, "epoch": 5.21, "learning_rate": 2.3951817413355876e-05, "loss": 0.5318, "step": 6163, "task_loss": 1.0586518049240112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30877485871315, "epoch": 5.21, "learning_rate": 2.3947590870667792e-05, "loss": 0.401, "step": 6164, "task_loss": 0.4690262973308563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31064373254776, "epoch": 5.21, "learning_rate": 2.3943364327979716e-05, "loss": 0.378, "step": 6165, "task_loss": 0.7250860333442688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4359944760799408, "epoch": 5.21, "learning_rate": 2.3939137785291632e-05, "loss": 0.3895, "step": 6166, "task_loss": 0.3688778877258301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5869047045707703, "epoch": 5.21, "learning_rate": 2.3934911242603552e-05, "loss": 0.7003, "step": 6167, "task_loss": 0.5083137154579163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45609742403030396, "epoch": 5.21, "learning_rate": 2.393068469991547e-05, "loss": 0.5405, "step": 6168, "task_loss": 0.7061580419540405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27940666675567627, "epoch": 5.21, "learning_rate": 2.3926458157227388e-05, "loss": 0.4363, "step": 6169, "task_loss": 0.7165878415107727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4445539712905884, "epoch": 5.22, "learning_rate": 2.3922231614539308e-05, "loss": 0.483, "step": 6170, "task_loss": 0.7307248115539551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48014184832572937, "epoch": 5.22, "learning_rate": 2.3918005071851227e-05, "loss": 0.415, "step": 6171, "task_loss": 1.2688418626785278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4180171489715576, "epoch": 5.22, "learning_rate": 2.3913778529163144e-05, "loss": 0.4173, "step": 6172, "task_loss": 0.5772649645805359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38885343074798584, "epoch": 5.22, "learning_rate": 2.3909551986475063e-05, "loss": 0.5125, "step": 6173, "task_loss": 0.37760668992996216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35532480478286743, "epoch": 5.22, "learning_rate": 2.3905325443786983e-05, "loss": 0.4287, "step": 6174, "task_loss": 0.7236931324005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34226906299591064, "epoch": 5.22, "learning_rate": 2.3901098901098903e-05, "loss": 0.5838, "step": 6175, "task_loss": 1.1071233749389648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20892786979675293, "epoch": 5.22, "learning_rate": 2.3896872358410823e-05, "loss": 0.4738, "step": 6176, "task_loss": 1.0729855298995972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2818046808242798, "epoch": 5.22, "learning_rate": 2.389264581572274e-05, "loss": 0.4685, "step": 6177, "task_loss": 0.6242684125900269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5857528448104858, "epoch": 5.22, "learning_rate": 2.388841927303466e-05, "loss": 0.3985, "step": 6178, "task_loss": 0.8326637744903564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6040318608283997, "epoch": 5.22, "learning_rate": 2.388419273034658e-05, "loss": 0.5979, "step": 6179, "task_loss": 0.8495544195175171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5291112065315247, "epoch": 5.22, "learning_rate": 2.3879966187658495e-05, "loss": 0.4013, "step": 6180, "task_loss": 0.14762166142463684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29011183977127075, "epoch": 5.22, "learning_rate": 2.3875739644970414e-05, "loss": 0.4998, "step": 6181, "task_loss": 0.8502899408340454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.54482102394104, "epoch": 5.23, "learning_rate": 2.3871513102282334e-05, "loss": 0.4466, "step": 6182, "task_loss": 0.47218915820121765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4998707175254822, "epoch": 5.23, "learning_rate": 2.3867286559594254e-05, "loss": 0.484, "step": 6183, "task_loss": 1.1995604038238525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5405217409133911, "epoch": 5.23, "learning_rate": 2.386306001690617e-05, "loss": 0.5226, "step": 6184, "task_loss": 0.6516919732093811 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40171027183532715, "epoch": 5.23, "learning_rate": 2.385883347421809e-05, "loss": 0.5009, "step": 6185, "task_loss": 1.1580421924591064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3655277490615845, "epoch": 5.23, "learning_rate": 2.385460693153001e-05, "loss": 0.4911, "step": 6186, "task_loss": 0.7408225536346436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38081884384155273, "epoch": 5.23, "learning_rate": 2.385038038884193e-05, "loss": 0.4583, "step": 6187, "task_loss": 0.7897555232048035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35455620288848877, "epoch": 5.23, "learning_rate": 2.384615384615385e-05, "loss": 0.5004, "step": 6188, "task_loss": 0.08219151943922043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24608838558197021, "epoch": 5.23, "learning_rate": 2.3841927303465766e-05, "loss": 0.432, "step": 6189, "task_loss": 0.10677923262119293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38481777906417847, "epoch": 5.23, "learning_rate": 2.3837700760777685e-05, "loss": 0.4889, "step": 6190, "task_loss": 0.14600814878940582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27503272891044617, "epoch": 5.23, "learning_rate": 2.3833474218089605e-05, "loss": 0.3807, "step": 6191, "task_loss": 0.5679135322570801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6669226288795471, "epoch": 5.23, "learning_rate": 2.382924767540152e-05, "loss": 0.688, "step": 6192, "task_loss": 0.7145673036575317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31415343284606934, "epoch": 5.23, "learning_rate": 2.382502113271344e-05, "loss": 0.3753, "step": 6193, "task_loss": 0.5760126709938049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44989722967147827, "epoch": 5.24, "learning_rate": 2.382079459002536e-05, "loss": 0.6227, "step": 6194, "task_loss": 0.6788726449012756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28375405073165894, "epoch": 5.24, "learning_rate": 2.3816568047337277e-05, "loss": 0.4985, "step": 6195, "task_loss": 0.17066077888011932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36997678875923157, "epoch": 5.24, "learning_rate": 2.38123415046492e-05, "loss": 0.3768, "step": 6196, "task_loss": 0.5197086930274963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3998801112174988, "epoch": 5.24, "learning_rate": 2.3808114961961117e-05, "loss": 0.4801, "step": 6197, "task_loss": 0.848540186882019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33236798644065857, "epoch": 5.24, "learning_rate": 2.3803888419273036e-05, "loss": 0.3891, "step": 6198, "task_loss": 0.03807831183075905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38963958621025085, "epoch": 5.24, "learning_rate": 2.3799661876584956e-05, "loss": 0.5063, "step": 6199, "task_loss": 0.21854987740516663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40772736072540283, "epoch": 5.24, "learning_rate": 2.3795435333896873e-05, "loss": 0.5142, "step": 6200, "task_loss": 0.4438783824443817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3230104446411133, "epoch": 5.24, "learning_rate": 2.3791208791208792e-05, "loss": 0.4439, "step": 6201, "task_loss": 0.040172476321458817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42724302411079407, "epoch": 5.24, "learning_rate": 2.3786982248520712e-05, "loss": 0.5624, "step": 6202, "task_loss": 0.5504933595657349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22374588251113892, "epoch": 5.24, "learning_rate": 2.378275570583263e-05, "loss": 0.4967, "step": 6203, "task_loss": 0.1483563482761383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2604157030582428, "epoch": 5.24, "learning_rate": 2.377852916314455e-05, "loss": 0.4024, "step": 6204, "task_loss": 0.2101244032382965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3066834807395935, "epoch": 5.24, "learning_rate": 2.3774302620456468e-05, "loss": 0.5404, "step": 6205, "task_loss": 0.6719635725021362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5508102178573608, "epoch": 5.25, "learning_rate": 2.3770076077768384e-05, "loss": 0.4349, "step": 6206, "task_loss": 0.819719135761261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4748684763908386, "epoch": 5.25, "learning_rate": 2.3765849535080307e-05, "loss": 0.5037, "step": 6207, "task_loss": 1.0238730907440186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29316431283950806, "epoch": 5.25, "learning_rate": 2.3761622992392224e-05, "loss": 0.4317, "step": 6208, "task_loss": 0.1587817668914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.459617555141449, "epoch": 5.25, "learning_rate": 2.3757396449704143e-05, "loss": 0.5014, "step": 6209, "task_loss": 0.37662234902381897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6899843811988831, "epoch": 5.25, "learning_rate": 2.3753169907016063e-05, "loss": 0.4618, "step": 6210, "task_loss": 0.5653902888298035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4633323848247528, "epoch": 5.25, "learning_rate": 2.374894336432798e-05, "loss": 0.5622, "step": 6211, "task_loss": 0.5838003158569336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9908839464187622, "epoch": 5.25, "learning_rate": 2.37447168216399e-05, "loss": 0.5773, "step": 6212, "task_loss": 0.7032069563865662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40244072675704956, "epoch": 5.25, "learning_rate": 2.374049027895182e-05, "loss": 0.4676, "step": 6213, "task_loss": 0.23335306346416473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6343683004379272, "epoch": 5.25, "learning_rate": 2.3736263736263735e-05, "loss": 0.4552, "step": 6214, "task_loss": 1.1770395040512085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49519848823547363, "epoch": 5.25, "learning_rate": 2.373203719357566e-05, "loss": 0.5787, "step": 6215, "task_loss": 1.8557019233703613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0788205862045288, "epoch": 5.25, "learning_rate": 2.3727810650887575e-05, "loss": 0.6002, "step": 6216, "task_loss": 1.432248592376709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3671010732650757, "epoch": 5.26, "learning_rate": 2.3723584108199495e-05, "loss": 0.3877, "step": 6217, "task_loss": 0.8160368204116821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14648397266864777, "epoch": 5.26, "learning_rate": 2.3719357565511414e-05, "loss": 0.4201, "step": 6218, "task_loss": 0.5890552997589111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3210896849632263, "epoch": 5.26, "learning_rate": 2.371513102282333e-05, "loss": 0.3525, "step": 6219, "task_loss": 0.11566920578479767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48766273260116577, "epoch": 5.26, "learning_rate": 2.371090448013525e-05, "loss": 0.5473, "step": 6220, "task_loss": 1.1073604822158813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.589460015296936, "epoch": 5.26, "learning_rate": 2.370667793744717e-05, "loss": 0.4836, "step": 6221, "task_loss": 0.1785520315170288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27041614055633545, "epoch": 5.26, "learning_rate": 2.3702451394759087e-05, "loss": 0.4561, "step": 6222, "task_loss": 0.2553998827934265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33399829268455505, "epoch": 5.26, "learning_rate": 2.3698224852071006e-05, "loss": 0.421, "step": 6223, "task_loss": 0.15752455592155457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31323787569999695, "epoch": 5.26, "learning_rate": 2.3693998309382926e-05, "loss": 0.4284, "step": 6224, "task_loss": 0.21446283161640167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5507957935333252, "epoch": 5.26, "learning_rate": 2.3689771766694846e-05, "loss": 0.5554, "step": 6225, "task_loss": 1.9506876468658447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44343873858451843, "epoch": 5.26, "learning_rate": 2.3685545224006765e-05, "loss": 0.3812, "step": 6226, "task_loss": 0.5801596641540527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3441895842552185, "epoch": 5.26, "learning_rate": 2.3681318681318682e-05, "loss": 0.4325, "step": 6227, "task_loss": 0.7725982069969177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7963119149208069, "epoch": 5.26, "learning_rate": 2.36770921386306e-05, "loss": 0.6446, "step": 6228, "task_loss": 1.8608191013336182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.575245201587677, "epoch": 5.27, "learning_rate": 2.367286559594252e-05, "loss": 0.4851, "step": 6229, "task_loss": 0.590059220790863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.537763774394989, "epoch": 5.27, "learning_rate": 2.3668639053254438e-05, "loss": 0.5185, "step": 6230, "task_loss": 0.6747527718544006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25006818771362305, "epoch": 5.27, "learning_rate": 2.3664412510566357e-05, "loss": 0.4855, "step": 6231, "task_loss": 0.3574564456939697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.12133331596851349, "epoch": 5.27, "learning_rate": 2.3660185967878277e-05, "loss": 0.4084, "step": 6232, "task_loss": 0.015367194078862667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5480034351348877, "epoch": 5.27, "learning_rate": 2.3655959425190197e-05, "loss": 0.527, "step": 6233, "task_loss": 1.6213537454605103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44211721420288086, "epoch": 5.27, "learning_rate": 2.3651732882502113e-05, "loss": 0.5621, "step": 6234, "task_loss": 0.9960834980010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34656888246536255, "epoch": 5.27, "learning_rate": 2.3647506339814033e-05, "loss": 0.4312, "step": 6235, "task_loss": 0.2736213505268097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5731849670410156, "epoch": 5.27, "learning_rate": 2.3643279797125953e-05, "loss": 0.4383, "step": 6236, "task_loss": 0.29941362142562866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4600813388824463, "epoch": 5.27, "learning_rate": 2.363905325443787e-05, "loss": 0.4341, "step": 6237, "task_loss": 0.36316174268722534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6788662075996399, "epoch": 5.27, "learning_rate": 2.363482671174979e-05, "loss": 0.5862, "step": 6238, "task_loss": 0.7667924165725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6313486099243164, "epoch": 5.27, "learning_rate": 2.363060016906171e-05, "loss": 0.3478, "step": 6239, "task_loss": 0.39742758870124817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7032457590103149, "epoch": 5.27, "learning_rate": 2.3626373626373628e-05, "loss": 0.5508, "step": 6240, "task_loss": 0.26318061351776123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6162355542182922, "epoch": 5.28, "learning_rate": 2.3622147083685548e-05, "loss": 0.4791, "step": 6241, "task_loss": 1.9621481895446777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5780901908874512, "epoch": 5.28, "learning_rate": 2.3617920540997464e-05, "loss": 0.4403, "step": 6242, "task_loss": 0.7662016153335571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4235873818397522, "epoch": 5.28, "learning_rate": 2.3613693998309384e-05, "loss": 0.4591, "step": 6243, "task_loss": 1.1155574321746826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2787354588508606, "epoch": 5.28, "learning_rate": 2.3609467455621304e-05, "loss": 0.3899, "step": 6244, "task_loss": 0.6909473538398743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44485950469970703, "epoch": 5.28, "learning_rate": 2.360524091293322e-05, "loss": 0.5258, "step": 6245, "task_loss": 0.7350509166717529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3484078645706177, "epoch": 5.28, "learning_rate": 2.3601014370245143e-05, "loss": 0.3873, "step": 6246, "task_loss": 0.49561452865600586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5656984448432922, "epoch": 5.28, "learning_rate": 2.359678782755706e-05, "loss": 0.5469, "step": 6247, "task_loss": 0.6476038098335266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29890021681785583, "epoch": 5.28, "learning_rate": 2.3592561284868976e-05, "loss": 0.3794, "step": 6248, "task_loss": 0.34847554564476013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5308851003646851, "epoch": 5.28, "learning_rate": 2.35883347421809e-05, "loss": 0.4009, "step": 6249, "task_loss": 1.101190209388733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6128857135772705, "epoch": 5.28, "learning_rate": 2.3584108199492815e-05, "loss": 0.4499, "step": 6250, "task_loss": 1.1265512704849243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16186052560806274, "epoch": 5.28, "learning_rate": 2.3579881656804735e-05, "loss": 0.4538, "step": 6251, "task_loss": 1.03965425491333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4434671401977539, "epoch": 5.28, "learning_rate": 2.3575655114116655e-05, "loss": 0.47, "step": 6252, "task_loss": 0.6152116656303406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28809115290641785, "epoch": 5.29, "learning_rate": 2.357142857142857e-05, "loss": 0.4019, "step": 6253, "task_loss": 0.48627209663391113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3234362006187439, "epoch": 5.29, "learning_rate": 2.356720202874049e-05, "loss": 0.46, "step": 6254, "task_loss": 0.4574730694293976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012171268463135, "epoch": 5.29, "learning_rate": 2.356297548605241e-05, "loss": 0.4611, "step": 6255, "task_loss": 0.7161834836006165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6277874708175659, "epoch": 5.29, "learning_rate": 2.3558748943364327e-05, "loss": 0.5396, "step": 6256, "task_loss": 0.9952029585838318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5955291390419006, "epoch": 5.29, "learning_rate": 2.355452240067625e-05, "loss": 0.4362, "step": 6257, "task_loss": 0.4639523923397064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4481378495693207, "epoch": 5.29, "learning_rate": 2.3550295857988167e-05, "loss": 0.5522, "step": 6258, "task_loss": 0.4169240891933441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4396369457244873, "epoch": 5.29, "learning_rate": 2.3546069315300083e-05, "loss": 0.5029, "step": 6259, "task_loss": 0.200527161359787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34670278429985046, "epoch": 5.29, "learning_rate": 2.3541842772612006e-05, "loss": 0.4039, "step": 6260, "task_loss": 1.1637672185897827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39813128113746643, "epoch": 5.29, "learning_rate": 2.3537616229923922e-05, "loss": 0.4282, "step": 6261, "task_loss": 0.27429771423339844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6128538846969604, "epoch": 5.29, "learning_rate": 2.3533389687235842e-05, "loss": 0.4492, "step": 6262, "task_loss": 0.508765697479248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48425254225730896, "epoch": 5.29, "learning_rate": 2.3529163144547762e-05, "loss": 0.5545, "step": 6263, "task_loss": 0.07327888906002045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4513271152973175, "epoch": 5.29, "learning_rate": 2.3524936601859678e-05, "loss": 0.4591, "step": 6264, "task_loss": 1.0417494773864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5475974082946777, "epoch": 5.3, "learning_rate": 2.3520710059171598e-05, "loss": 0.5554, "step": 6265, "task_loss": 0.5591528415679932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3692634701728821, "epoch": 5.3, "learning_rate": 2.3516483516483518e-05, "loss": 0.4618, "step": 6266, "task_loss": 0.7540041208267212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40020036697387695, "epoch": 5.3, "learning_rate": 2.3512256973795434e-05, "loss": 0.4721, "step": 6267, "task_loss": 0.2871745824813843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34949609637260437, "epoch": 5.3, "learning_rate": 2.3508030431107357e-05, "loss": 0.4112, "step": 6268, "task_loss": 0.6027837991714478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49232056736946106, "epoch": 5.3, "learning_rate": 2.3503803888419274e-05, "loss": 0.3764, "step": 6269, "task_loss": 0.4374786615371704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33673879504203796, "epoch": 5.3, "learning_rate": 2.3499577345731193e-05, "loss": 0.6507, "step": 6270, "task_loss": 0.06498978286981583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3401879072189331, "epoch": 5.3, "learning_rate": 2.3495350803043113e-05, "loss": 0.4364, "step": 6271, "task_loss": 0.2961442470550537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6984174251556396, "epoch": 5.3, "learning_rate": 2.349112426035503e-05, "loss": 0.6421, "step": 6272, "task_loss": 1.2203418016433716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4302659034729004, "epoch": 5.3, "learning_rate": 2.348689771766695e-05, "loss": 0.5239, "step": 6273, "task_loss": 0.6233135461807251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15413682162761688, "epoch": 5.3, "learning_rate": 2.348267117497887e-05, "loss": 0.333, "step": 6274, "task_loss": 0.1554541438817978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32594770193099976, "epoch": 5.3, "learning_rate": 2.347844463229079e-05, "loss": 0.3904, "step": 6275, "task_loss": 0.08425401151180267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3967881202697754, "epoch": 5.3, "learning_rate": 2.3474218089602705e-05, "loss": 0.4763, "step": 6276, "task_loss": 0.8253606557846069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6584041714668274, "epoch": 5.31, "learning_rate": 2.3469991546914625e-05, "loss": 0.4435, "step": 6277, "task_loss": 1.5819460153579712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5217419862747192, "epoch": 5.31, "learning_rate": 2.3465765004226544e-05, "loss": 0.5061, "step": 6278, "task_loss": 0.2556835114955902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4729047417640686, "epoch": 5.31, "learning_rate": 2.3461538461538464e-05, "loss": 0.4692, "step": 6279, "task_loss": 0.2707321047782898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9134374260902405, "epoch": 5.31, "learning_rate": 2.345731191885038e-05, "loss": 0.6427, "step": 6280, "task_loss": 1.277496337890625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.263666033744812, "epoch": 5.31, "learning_rate": 2.34530853761623e-05, "loss": 0.3502, "step": 6281, "task_loss": 0.5824320912361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4495238959789276, "epoch": 5.31, "learning_rate": 2.344885883347422e-05, "loss": 0.4482, "step": 6282, "task_loss": 0.8620867729187012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2633923292160034, "epoch": 5.31, "learning_rate": 2.344463229078614e-05, "loss": 0.4124, "step": 6283, "task_loss": 0.6501432657241821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.281352162361145, "epoch": 5.31, "learning_rate": 2.3440405748098056e-05, "loss": 0.5196, "step": 6284, "task_loss": 0.9025052785873413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4177960157394409, "epoch": 5.31, "learning_rate": 2.3436179205409976e-05, "loss": 0.4289, "step": 6285, "task_loss": 0.640766441822052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6713600158691406, "epoch": 5.31, "learning_rate": 2.3431952662721896e-05, "loss": 0.5789, "step": 6286, "task_loss": 0.4753790497779846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38802266120910645, "epoch": 5.31, "learning_rate": 2.3427726120033812e-05, "loss": 0.4026, "step": 6287, "task_loss": 0.218027725815773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4825308620929718, "epoch": 5.32, "learning_rate": 2.342349957734573e-05, "loss": 0.4285, "step": 6288, "task_loss": 0.7239080667495728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25949832797050476, "epoch": 5.32, "learning_rate": 2.341927303465765e-05, "loss": 0.3898, "step": 6289, "task_loss": 0.15181967616081238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5462753176689148, "epoch": 5.32, "learning_rate": 2.341504649196957e-05, "loss": 0.5793, "step": 6290, "task_loss": 0.3417903482913971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4920628070831299, "epoch": 5.32, "learning_rate": 2.341081994928149e-05, "loss": 0.471, "step": 6291, "task_loss": 0.855144739151001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31098222732543945, "epoch": 5.32, "learning_rate": 2.3406593406593407e-05, "loss": 0.4058, "step": 6292, "task_loss": 0.5844261050224304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8183332085609436, "epoch": 5.32, "learning_rate": 2.3402366863905327e-05, "loss": 0.4288, "step": 6293, "task_loss": 0.9094383716583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6002986431121826, "epoch": 5.32, "learning_rate": 2.3398140321217247e-05, "loss": 0.5538, "step": 6294, "task_loss": 0.46236011385917664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7307037115097046, "epoch": 5.32, "learning_rate": 2.3393913778529163e-05, "loss": 0.5954, "step": 6295, "task_loss": 0.9432345628738403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5027535557746887, "epoch": 5.32, "learning_rate": 2.3389687235841083e-05, "loss": 0.4874, "step": 6296, "task_loss": 0.6954266428947449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5634467601776123, "epoch": 5.32, "learning_rate": 2.3385460693153003e-05, "loss": 0.5371, "step": 6297, "task_loss": 1.1679253578186035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24532440304756165, "epoch": 5.32, "learning_rate": 2.338123415046492e-05, "loss": 0.3951, "step": 6298, "task_loss": 0.9199677109718323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9492353200912476, "epoch": 5.32, "learning_rate": 2.3377007607776842e-05, "loss": 0.5881, "step": 6299, "task_loss": 1.0791666507720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.473374605178833, "epoch": 5.33, "learning_rate": 2.337278106508876e-05, "loss": 0.4744, "step": 6300, "task_loss": 0.5616335868835449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8260717391967773, "epoch": 5.33, "learning_rate": 2.3368554522400675e-05, "loss": 0.6269, "step": 6301, "task_loss": 1.231544852256775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4669424593448639, "epoch": 5.33, "learning_rate": 2.3364327979712598e-05, "loss": 0.5183, "step": 6302, "task_loss": 0.44171619415283203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4790842831134796, "epoch": 5.33, "learning_rate": 2.3360101437024514e-05, "loss": 0.4442, "step": 6303, "task_loss": 1.087006688117981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.267011433839798, "epoch": 5.33, "learning_rate": 2.3355874894336434e-05, "loss": 0.4156, "step": 6304, "task_loss": 1.3671830892562866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8638502359390259, "epoch": 5.33, "learning_rate": 2.3351648351648354e-05, "loss": 0.6019, "step": 6305, "task_loss": 0.4426146149635315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5196127891540527, "epoch": 5.33, "learning_rate": 2.334742180896027e-05, "loss": 0.58, "step": 6306, "task_loss": 0.6124235391616821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26033371686935425, "epoch": 5.33, "learning_rate": 2.334319526627219e-05, "loss": 0.539, "step": 6307, "task_loss": 0.3434199392795563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5375725626945496, "epoch": 5.33, "learning_rate": 2.333896872358411e-05, "loss": 0.5522, "step": 6308, "task_loss": 0.6082590222358704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5345404148101807, "epoch": 5.33, "learning_rate": 2.3334742180896026e-05, "loss": 0.4097, "step": 6309, "task_loss": 1.2123430967330933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25966495275497437, "epoch": 5.33, "learning_rate": 2.333051563820795e-05, "loss": 0.3246, "step": 6310, "task_loss": 0.02176729030907154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5519406795501709, "epoch": 5.33, "learning_rate": 2.3326289095519865e-05, "loss": 0.5274, "step": 6311, "task_loss": 0.40845367312431335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.570057213306427, "epoch": 5.34, "learning_rate": 2.3322062552831785e-05, "loss": 0.4373, "step": 6312, "task_loss": 0.3954927921295166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7295188903808594, "epoch": 5.34, "learning_rate": 2.3317836010143705e-05, "loss": 0.4618, "step": 6313, "task_loss": 0.880217432975769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5778757929801941, "epoch": 5.34, "learning_rate": 2.331360946745562e-05, "loss": 0.4438, "step": 6314, "task_loss": 0.27902382612228394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26342594623565674, "epoch": 5.34, "learning_rate": 2.330938292476754e-05, "loss": 0.4656, "step": 6315, "task_loss": 0.7434739470481873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35087716579437256, "epoch": 5.34, "learning_rate": 2.330515638207946e-05, "loss": 0.3003, "step": 6316, "task_loss": 0.21923521161079407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4444340467453003, "epoch": 5.34, "learning_rate": 2.3300929839391377e-05, "loss": 0.4964, "step": 6317, "task_loss": 0.7905334234237671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26901328563690186, "epoch": 5.34, "learning_rate": 2.3296703296703297e-05, "loss": 0.3457, "step": 6318, "task_loss": 0.08010376989841461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0238771438598633, "epoch": 5.34, "learning_rate": 2.3292476754015216e-05, "loss": 0.6925, "step": 6319, "task_loss": 1.4084781408309937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7447539567947388, "epoch": 5.34, "learning_rate": 2.3288250211327136e-05, "loss": 0.4913, "step": 6320, "task_loss": 0.694594144821167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3836894631385803, "epoch": 5.34, "learning_rate": 2.3284023668639056e-05, "loss": 0.4298, "step": 6321, "task_loss": 1.0191599130630493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6567772030830383, "epoch": 5.34, "learning_rate": 2.3279797125950972e-05, "loss": 0.541, "step": 6322, "task_loss": 1.4293559789657593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22209644317626953, "epoch": 5.34, "learning_rate": 2.3275570583262892e-05, "loss": 0.362, "step": 6323, "task_loss": 0.21419844031333923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6108503341674805, "epoch": 5.35, "learning_rate": 2.3271344040574812e-05, "loss": 0.4399, "step": 6324, "task_loss": 0.47167152166366577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6511044502258301, "epoch": 5.35, "learning_rate": 2.3267117497886728e-05, "loss": 0.503, "step": 6325, "task_loss": 0.6655309796333313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5060898661613464, "epoch": 5.35, "learning_rate": 2.3262890955198648e-05, "loss": 0.5004, "step": 6326, "task_loss": 0.23334598541259766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6852645874023438, "epoch": 5.35, "learning_rate": 2.3258664412510568e-05, "loss": 0.5676, "step": 6327, "task_loss": 0.6163545846939087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.705136775970459, "epoch": 5.35, "learning_rate": 2.3254437869822487e-05, "loss": 0.7197, "step": 6328, "task_loss": 0.3037494421005249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.907212495803833, "epoch": 5.35, "learning_rate": 2.3250211327134404e-05, "loss": 0.6543, "step": 6329, "task_loss": 0.3265439569950104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7458125948905945, "epoch": 5.35, "learning_rate": 2.3245984784446323e-05, "loss": 0.4454, "step": 6330, "task_loss": 0.8707532286643982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4769487977027893, "epoch": 5.35, "learning_rate": 2.3241758241758243e-05, "loss": 0.5082, "step": 6331, "task_loss": 0.7781730890274048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4995899200439453, "epoch": 5.35, "learning_rate": 2.3237531699070163e-05, "loss": 0.4806, "step": 6332, "task_loss": 0.6673713326454163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.275932252407074, "epoch": 5.35, "learning_rate": 2.3233305156382083e-05, "loss": 0.3908, "step": 6333, "task_loss": 0.34814953804016113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37716493010520935, "epoch": 5.35, "learning_rate": 2.3229078613694e-05, "loss": 0.4074, "step": 6334, "task_loss": 0.3317495584487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5032260417938232, "epoch": 5.35, "learning_rate": 2.322485207100592e-05, "loss": 0.4644, "step": 6335, "task_loss": 0.4129529595375061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39171987771987915, "epoch": 5.36, "learning_rate": 2.322062552831784e-05, "loss": 0.3735, "step": 6336, "task_loss": 0.2407274693250656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5966951847076416, "epoch": 5.36, "learning_rate": 2.3216398985629755e-05, "loss": 0.6551, "step": 6337, "task_loss": 1.4332427978515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5214473605155945, "epoch": 5.36, "learning_rate": 2.3212172442941675e-05, "loss": 0.5769, "step": 6338, "task_loss": 0.2481902688741684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6677607297897339, "epoch": 5.36, "learning_rate": 2.3207945900253594e-05, "loss": 0.5079, "step": 6339, "task_loss": 0.8833622336387634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3551899790763855, "epoch": 5.36, "learning_rate": 2.320371935756551e-05, "loss": 0.5046, "step": 6340, "task_loss": 0.6206568479537964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47297364473342896, "epoch": 5.36, "learning_rate": 2.3199492814877434e-05, "loss": 0.481, "step": 6341, "task_loss": 1.128438115119934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49855107069015503, "epoch": 5.36, "learning_rate": 2.319526627218935e-05, "loss": 0.7045, "step": 6342, "task_loss": 0.7372344136238098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4442156255245209, "epoch": 5.36, "learning_rate": 2.319103972950127e-05, "loss": 0.5127, "step": 6343, "task_loss": 0.5850629210472107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29978296160697937, "epoch": 5.36, "learning_rate": 2.318681318681319e-05, "loss": 0.5218, "step": 6344, "task_loss": 0.5273677706718445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28832143545150757, "epoch": 5.36, "learning_rate": 2.3182586644125106e-05, "loss": 0.4649, "step": 6345, "task_loss": 0.34064310789108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36183393001556396, "epoch": 5.36, "learning_rate": 2.3178360101437026e-05, "loss": 0.3171, "step": 6346, "task_loss": 0.39563506841659546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4016374349594116, "epoch": 5.36, "learning_rate": 2.3174133558748945e-05, "loss": 0.4561, "step": 6347, "task_loss": 1.013319969177246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44493088126182556, "epoch": 5.37, "learning_rate": 2.3169907016060862e-05, "loss": 0.4621, "step": 6348, "task_loss": 0.6441196799278259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3102928698062897, "epoch": 5.37, "learning_rate": 2.3165680473372785e-05, "loss": 0.4571, "step": 6349, "task_loss": 0.43051812052726746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4973911643028259, "epoch": 5.37, "learning_rate": 2.31614539306847e-05, "loss": 0.5559, "step": 6350, "task_loss": 1.2376153469085693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4312673807144165, "epoch": 5.37, "learning_rate": 2.3157227387996618e-05, "loss": 0.3635, "step": 6351, "task_loss": 0.6045324206352234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.556331217288971, "epoch": 5.37, "learning_rate": 2.315300084530854e-05, "loss": 0.4133, "step": 6352, "task_loss": 0.2536722719669342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45933598279953003, "epoch": 5.37, "learning_rate": 2.3148774302620457e-05, "loss": 0.5702, "step": 6353, "task_loss": 1.744941234588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2884848415851593, "epoch": 5.37, "learning_rate": 2.3144547759932377e-05, "loss": 0.5027, "step": 6354, "task_loss": 0.9973456859588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5337940454483032, "epoch": 5.37, "learning_rate": 2.3140321217244297e-05, "loss": 0.599, "step": 6355, "task_loss": 0.6655141711235046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5315665006637573, "epoch": 5.37, "learning_rate": 2.3136094674556213e-05, "loss": 0.4786, "step": 6356, "task_loss": 0.5373191237449646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40978026390075684, "epoch": 5.37, "learning_rate": 2.3131868131868133e-05, "loss": 0.4364, "step": 6357, "task_loss": 0.1383046805858612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3069595992565155, "epoch": 5.37, "learning_rate": 2.3127641589180052e-05, "loss": 0.4057, "step": 6358, "task_loss": 0.37925341725349426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4968445897102356, "epoch": 5.38, "learning_rate": 2.312341504649197e-05, "loss": 0.5779, "step": 6359, "task_loss": 0.6606655716896057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7478101849555969, "epoch": 5.38, "learning_rate": 2.3119188503803892e-05, "loss": 0.5846, "step": 6360, "task_loss": 0.9579147100448608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7198728919029236, "epoch": 5.38, "learning_rate": 2.3114961961115808e-05, "loss": 0.5251, "step": 6361, "task_loss": 0.5034919381141663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41016674041748047, "epoch": 5.38, "learning_rate": 2.3110735418427728e-05, "loss": 0.4581, "step": 6362, "task_loss": 0.6901698708534241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4036819040775299, "epoch": 5.38, "learning_rate": 2.3106508875739648e-05, "loss": 0.5376, "step": 6363, "task_loss": 0.5144193172454834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4146765470504761, "epoch": 5.38, "learning_rate": 2.3102282333051564e-05, "loss": 0.4492, "step": 6364, "task_loss": 0.14593659341335297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3329591155052185, "epoch": 5.38, "learning_rate": 2.3098055790363484e-05, "loss": 0.5064, "step": 6365, "task_loss": 0.1634732484817505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38214319944381714, "epoch": 5.38, "learning_rate": 2.3093829247675404e-05, "loss": 0.4409, "step": 6366, "task_loss": 0.2822558581829071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43107059597969055, "epoch": 5.38, "learning_rate": 2.308960270498732e-05, "loss": 0.5129, "step": 6367, "task_loss": 0.43987447023391724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4098028838634491, "epoch": 5.38, "learning_rate": 2.308537616229924e-05, "loss": 0.5469, "step": 6368, "task_loss": 0.71338951587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4446815848350525, "epoch": 5.38, "learning_rate": 2.308114961961116e-05, "loss": 0.589, "step": 6369, "task_loss": 0.22819450497627258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36666053533554077, "epoch": 5.38, "learning_rate": 2.307692307692308e-05, "loss": 0.5098, "step": 6370, "task_loss": 0.5681432485580444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4629097580909729, "epoch": 5.39, "learning_rate": 2.3072696534234995e-05, "loss": 0.5048, "step": 6371, "task_loss": 0.5546885132789612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36749204993247986, "epoch": 5.39, "learning_rate": 2.3068469991546915e-05, "loss": 0.4672, "step": 6372, "task_loss": 0.33122384548187256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4888591170310974, "epoch": 5.39, "learning_rate": 2.3064243448858835e-05, "loss": 0.4286, "step": 6373, "task_loss": 0.506497323513031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5156115293502808, "epoch": 5.39, "learning_rate": 2.3060016906170755e-05, "loss": 0.5264, "step": 6374, "task_loss": 0.8370395302772522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5073370933532715, "epoch": 5.39, "learning_rate": 2.305579036348267e-05, "loss": 0.4431, "step": 6375, "task_loss": 0.4581719934940338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2702508866786957, "epoch": 5.39, "learning_rate": 2.305156382079459e-05, "loss": 0.3415, "step": 6376, "task_loss": 0.702288806438446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.64216548204422, "epoch": 5.39, "learning_rate": 2.304733727810651e-05, "loss": 0.3822, "step": 6377, "task_loss": 0.7829493284225464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5027613639831543, "epoch": 5.39, "learning_rate": 2.304311073541843e-05, "loss": 0.6342, "step": 6378, "task_loss": 0.16487187147140503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7633986473083496, "epoch": 5.39, "learning_rate": 2.3038884192730347e-05, "loss": 0.5252, "step": 6379, "task_loss": 0.7553760409355164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.526973307132721, "epoch": 5.39, "learning_rate": 2.3034657650042266e-05, "loss": 0.4545, "step": 6380, "task_loss": 1.0537790060043335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5727398991584778, "epoch": 5.39, "learning_rate": 2.3030431107354186e-05, "loss": 0.4678, "step": 6381, "task_loss": 0.44452589750289917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30572181940078735, "epoch": 5.39, "learning_rate": 2.3026204564666102e-05, "loss": 0.3639, "step": 6382, "task_loss": 0.24077633023262024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44673413038253784, "epoch": 5.4, "learning_rate": 2.3021978021978022e-05, "loss": 0.4376, "step": 6383, "task_loss": 0.34365618228912354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4417877793312073, "epoch": 5.4, "learning_rate": 2.3017751479289942e-05, "loss": 0.5427, "step": 6384, "task_loss": 0.7422190308570862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9750477075576782, "epoch": 5.4, "learning_rate": 2.301352493660186e-05, "loss": 0.5902, "step": 6385, "task_loss": 1.2108874320983887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36852145195007324, "epoch": 5.4, "learning_rate": 2.300929839391378e-05, "loss": 0.4756, "step": 6386, "task_loss": 0.06643177568912506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2536177933216095, "epoch": 5.4, "learning_rate": 2.3005071851225698e-05, "loss": 0.5338, "step": 6387, "task_loss": 0.5156050324440002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6774274706840515, "epoch": 5.4, "learning_rate": 2.3000845308537617e-05, "loss": 0.5563, "step": 6388, "task_loss": 0.4827357828617096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4548439383506775, "epoch": 5.4, "learning_rate": 2.2996618765849537e-05, "loss": 0.4951, "step": 6389, "task_loss": 0.19972564280033112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6481550335884094, "epoch": 5.4, "learning_rate": 2.2992392223161454e-05, "loss": 0.5465, "step": 6390, "task_loss": 1.2912670373916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.632424533367157, "epoch": 5.4, "learning_rate": 2.2988165680473377e-05, "loss": 0.4956, "step": 6391, "task_loss": 0.7639898657798767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47841742634773254, "epoch": 5.4, "learning_rate": 2.2983939137785293e-05, "loss": 0.4975, "step": 6392, "task_loss": 1.1369231939315796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4226456582546234, "epoch": 5.4, "learning_rate": 2.297971259509721e-05, "loss": 0.5512, "step": 6393, "task_loss": 0.4186021387577057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5733023881912231, "epoch": 5.4, "learning_rate": 2.2975486052409132e-05, "loss": 0.5018, "step": 6394, "task_loss": 0.7421029806137085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6429904699325562, "epoch": 5.41, "learning_rate": 2.297125950972105e-05, "loss": 0.4053, "step": 6395, "task_loss": 0.5905466079711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5939153432846069, "epoch": 5.41, "learning_rate": 2.296703296703297e-05, "loss": 0.504, "step": 6396, "task_loss": 0.969499409198761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4579850137233734, "epoch": 5.41, "learning_rate": 2.2962806424344888e-05, "loss": 0.6099, "step": 6397, "task_loss": 1.0757176876068115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4377705454826355, "epoch": 5.41, "learning_rate": 2.2958579881656805e-05, "loss": 0.4037, "step": 6398, "task_loss": 0.11609987169504166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33455055952072144, "epoch": 5.41, "learning_rate": 2.2954353338968724e-05, "loss": 0.5197, "step": 6399, "task_loss": 0.9170243144035339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3963354229927063, "epoch": 5.41, "learning_rate": 2.2950126796280644e-05, "loss": 0.4247, "step": 6400, "task_loss": 0.5958443284034729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2776285409927368, "epoch": 5.41, "learning_rate": 2.294590025359256e-05, "loss": 0.4238, "step": 6401, "task_loss": 0.402399480342865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3305109143257141, "epoch": 5.41, "learning_rate": 2.2941673710904484e-05, "loss": 0.5602, "step": 6402, "task_loss": 0.2626878619194031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42549020051956177, "epoch": 5.41, "learning_rate": 2.29374471682164e-05, "loss": 0.5939, "step": 6403, "task_loss": 0.7085230350494385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3904922604560852, "epoch": 5.41, "learning_rate": 2.2933220625528316e-05, "loss": 0.3681, "step": 6404, "task_loss": 0.669359028339386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4589058458805084, "epoch": 5.41, "learning_rate": 2.292899408284024e-05, "loss": 0.4913, "step": 6405, "task_loss": 0.1431789994239807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2585071325302124, "epoch": 5.41, "learning_rate": 2.2924767540152156e-05, "loss": 0.4226, "step": 6406, "task_loss": 0.5096858143806458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6571400761604309, "epoch": 5.42, "learning_rate": 2.2920540997464076e-05, "loss": 0.5796, "step": 6407, "task_loss": 0.9306409955024719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5002869963645935, "epoch": 5.42, "learning_rate": 2.2916314454775995e-05, "loss": 0.362, "step": 6408, "task_loss": 0.377849280834198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27194857597351074, "epoch": 5.42, "learning_rate": 2.291208791208791e-05, "loss": 0.3585, "step": 6409, "task_loss": 0.12705577909946442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48667553067207336, "epoch": 5.42, "learning_rate": 2.290786136939983e-05, "loss": 0.4944, "step": 6410, "task_loss": 0.9912238121032715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6095097064971924, "epoch": 5.42, "learning_rate": 2.290363482671175e-05, "loss": 0.5013, "step": 6411, "task_loss": 0.6767289638519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7189763784408569, "epoch": 5.42, "learning_rate": 2.2899408284023667e-05, "loss": 0.686, "step": 6412, "task_loss": 0.9101861119270325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4065517783164978, "epoch": 5.42, "learning_rate": 2.289518174133559e-05, "loss": 0.3953, "step": 6413, "task_loss": 0.7958052754402161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6466049551963806, "epoch": 5.42, "learning_rate": 2.2890955198647507e-05, "loss": 0.5393, "step": 6414, "task_loss": 1.3449606895446777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36048945784568787, "epoch": 5.42, "learning_rate": 2.2886728655959427e-05, "loss": 0.4303, "step": 6415, "task_loss": 0.3475267291069031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5222697257995605, "epoch": 5.42, "learning_rate": 2.2882502113271346e-05, "loss": 0.5119, "step": 6416, "task_loss": 2.133908271789551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.481108158826828, "epoch": 5.42, "learning_rate": 2.2878275570583263e-05, "loss": 0.5032, "step": 6417, "task_loss": 0.523410975933075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42595553398132324, "epoch": 5.42, "learning_rate": 2.2874049027895182e-05, "loss": 0.47, "step": 6418, "task_loss": 0.4274257719516754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3975520730018616, "epoch": 5.43, "learning_rate": 2.2869822485207102e-05, "loss": 0.4003, "step": 6419, "task_loss": 0.5490153431892395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18036630749702454, "epoch": 5.43, "learning_rate": 2.286559594251902e-05, "loss": 0.4487, "step": 6420, "task_loss": 0.05080006644129753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4209897518157959, "epoch": 5.43, "learning_rate": 2.286136939983094e-05, "loss": 0.441, "step": 6421, "task_loss": 1.0873464345932007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.790572464466095, "epoch": 5.43, "learning_rate": 2.2857142857142858e-05, "loss": 0.5389, "step": 6422, "task_loss": 0.38755011558532715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48761221766471863, "epoch": 5.43, "learning_rate": 2.2852916314454778e-05, "loss": 0.6122, "step": 6423, "task_loss": 0.9050610661506653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4309987723827362, "epoch": 5.43, "learning_rate": 2.2848689771766698e-05, "loss": 0.5794, "step": 6424, "task_loss": 0.9355601668357849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5323224067687988, "epoch": 5.43, "learning_rate": 2.2844463229078614e-05, "loss": 0.493, "step": 6425, "task_loss": 0.3420991599559784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.332425594329834, "epoch": 5.43, "learning_rate": 2.2840236686390534e-05, "loss": 0.5334, "step": 6426, "task_loss": 0.8157430291175842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5683882236480713, "epoch": 5.43, "learning_rate": 2.2836010143702453e-05, "loss": 0.5076, "step": 6427, "task_loss": 0.71019047498703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4162535071372986, "epoch": 5.43, "learning_rate": 2.2831783601014373e-05, "loss": 0.4171, "step": 6428, "task_loss": 0.23452575504779816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49199119210243225, "epoch": 5.43, "learning_rate": 2.282755705832629e-05, "loss": 0.5298, "step": 6429, "task_loss": 0.3094976544380188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3522765040397644, "epoch": 5.44, "learning_rate": 2.282333051563821e-05, "loss": 0.3936, "step": 6430, "task_loss": 0.35985374450683594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.336675226688385, "epoch": 5.44, "learning_rate": 2.281910397295013e-05, "loss": 0.5095, "step": 6431, "task_loss": 0.06399393081665039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3393029570579529, "epoch": 5.44, "learning_rate": 2.2814877430262045e-05, "loss": 0.3491, "step": 6432, "task_loss": 0.16699552536010742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36589372158050537, "epoch": 5.44, "learning_rate": 2.2810650887573965e-05, "loss": 0.414, "step": 6433, "task_loss": 0.5629777312278748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23177243769168854, "epoch": 5.44, "learning_rate": 2.2806424344885885e-05, "loss": 0.451, "step": 6434, "task_loss": 0.6587018966674805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38670265674591064, "epoch": 5.44, "learning_rate": 2.28021978021978e-05, "loss": 0.389, "step": 6435, "task_loss": 0.9098807573318481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31930696964263916, "epoch": 5.44, "learning_rate": 2.2797971259509724e-05, "loss": 0.3419, "step": 6436, "task_loss": 0.6778592467308044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3730270266532898, "epoch": 5.44, "learning_rate": 2.279374471682164e-05, "loss": 0.4483, "step": 6437, "task_loss": 0.8430075645446777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26599180698394775, "epoch": 5.44, "learning_rate": 2.278951817413356e-05, "loss": 0.4399, "step": 6438, "task_loss": 0.05670448765158653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4763574004173279, "epoch": 5.44, "learning_rate": 2.278529163144548e-05, "loss": 0.403, "step": 6439, "task_loss": 0.1193678081035614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.634314775466919, "epoch": 5.44, "learning_rate": 2.2781065088757396e-05, "loss": 0.6364, "step": 6440, "task_loss": 0.6560878157615662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4641571342945099, "epoch": 5.44, "learning_rate": 2.2776838546069316e-05, "loss": 0.4685, "step": 6441, "task_loss": 1.5128400325775146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4815780818462372, "epoch": 5.45, "learning_rate": 2.2772612003381236e-05, "loss": 0.4287, "step": 6442, "task_loss": 1.0178754329681396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5637037754058838, "epoch": 5.45, "learning_rate": 2.2768385460693152e-05, "loss": 0.5423, "step": 6443, "task_loss": 0.3484659790992737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4281385540962219, "epoch": 5.45, "learning_rate": 2.2764158918005075e-05, "loss": 0.4568, "step": 6444, "task_loss": 1.363336443901062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48757416009902954, "epoch": 5.45, "learning_rate": 2.2759932375316992e-05, "loss": 0.6214, "step": 6445, "task_loss": 0.47412046790122986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30364447832107544, "epoch": 5.45, "learning_rate": 2.2755705832628908e-05, "loss": 0.5425, "step": 6446, "task_loss": 0.5810065865516663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6672943830490112, "epoch": 5.45, "learning_rate": 2.275147928994083e-05, "loss": 0.6902, "step": 6447, "task_loss": 0.23105435073375702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2524298429489136, "epoch": 5.45, "learning_rate": 2.2747252747252748e-05, "loss": 0.4326, "step": 6448, "task_loss": 0.579328715801239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41499415040016174, "epoch": 5.45, "learning_rate": 2.2743026204564667e-05, "loss": 0.5255, "step": 6449, "task_loss": 0.5841841101646423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.4481174945831299, "epoch": 5.45, "learning_rate": 2.2738799661876587e-05, "loss": 0.6774, "step": 6450, "task_loss": 1.411600947380066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47486603260040283, "epoch": 5.45, "learning_rate": 2.2734573119188503e-05, "loss": 0.5062, "step": 6451, "task_loss": 0.6447728872299194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26596134901046753, "epoch": 5.45, "learning_rate": 2.2730346576500423e-05, "loss": 0.4111, "step": 6452, "task_loss": 0.30093953013420105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35910356044769287, "epoch": 5.45, "learning_rate": 2.2726120033812343e-05, "loss": 0.4036, "step": 6453, "task_loss": 0.8828761577606201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43394482135772705, "epoch": 5.46, "learning_rate": 2.272189349112426e-05, "loss": 0.5238, "step": 6454, "task_loss": 0.6709728240966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6335655450820923, "epoch": 5.46, "learning_rate": 2.2717666948436182e-05, "loss": 0.5635, "step": 6455, "task_loss": 0.4928433895111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4067826271057129, "epoch": 5.46, "learning_rate": 2.27134404057481e-05, "loss": 0.5514, "step": 6456, "task_loss": 0.576821506023407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3135641813278198, "epoch": 5.46, "learning_rate": 2.270921386306002e-05, "loss": 0.369, "step": 6457, "task_loss": 0.6777243614196777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.745387077331543, "epoch": 5.46, "learning_rate": 2.2704987320371938e-05, "loss": 0.4971, "step": 6458, "task_loss": 0.9218758940696716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4622109532356262, "epoch": 5.46, "learning_rate": 2.2700760777683854e-05, "loss": 0.577, "step": 6459, "task_loss": 0.5221714973449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4771652817726135, "epoch": 5.46, "learning_rate": 2.2696534234995774e-05, "loss": 0.5078, "step": 6460, "task_loss": 0.6215834617614746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4503202736377716, "epoch": 5.46, "learning_rate": 2.2692307692307694e-05, "loss": 0.4179, "step": 6461, "task_loss": 1.5685396194458008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5257421135902405, "epoch": 5.46, "learning_rate": 2.268808114961961e-05, "loss": 0.3899, "step": 6462, "task_loss": 0.38245683908462524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39943039417266846, "epoch": 5.46, "learning_rate": 2.268385460693153e-05, "loss": 0.5771, "step": 6463, "task_loss": 0.45121529698371887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.540168046951294, "epoch": 5.46, "learning_rate": 2.267962806424345e-05, "loss": 0.5128, "step": 6464, "task_loss": 1.0453928709030151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30195993185043335, "epoch": 5.46, "learning_rate": 2.267540152155537e-05, "loss": 0.4729, "step": 6465, "task_loss": 1.2784377336502075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35184744000434875, "epoch": 5.47, "learning_rate": 2.267117497886729e-05, "loss": 0.4552, "step": 6466, "task_loss": 0.32141849398612976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36295440793037415, "epoch": 5.47, "learning_rate": 2.2666948436179206e-05, "loss": 0.4963, "step": 6467, "task_loss": 1.4009230136871338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23777607083320618, "epoch": 5.47, "learning_rate": 2.2662721893491125e-05, "loss": 0.4242, "step": 6468, "task_loss": 0.2750861644744873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7977490425109863, "epoch": 5.47, "learning_rate": 2.2658495350803045e-05, "loss": 0.6131, "step": 6469, "task_loss": 1.0260581970214844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41261765360832214, "epoch": 5.47, "learning_rate": 2.265426880811496e-05, "loss": 0.4611, "step": 6470, "task_loss": 0.2921566665172577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6386584043502808, "epoch": 5.47, "learning_rate": 2.265004226542688e-05, "loss": 0.5262, "step": 6471, "task_loss": 0.9740623235702515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6597335338592529, "epoch": 5.47, "learning_rate": 2.26458157227388e-05, "loss": 0.5375, "step": 6472, "task_loss": 0.4871094226837158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47870510816574097, "epoch": 5.47, "learning_rate": 2.264158918005072e-05, "loss": 0.4089, "step": 6473, "task_loss": 1.3056256771087646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41450589895248413, "epoch": 5.47, "learning_rate": 2.2637362637362637e-05, "loss": 0.5575, "step": 6474, "task_loss": 1.1317529678344727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2943868935108185, "epoch": 5.47, "learning_rate": 2.2633136094674557e-05, "loss": 0.4193, "step": 6475, "task_loss": 0.5493542551994324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.515693724155426, "epoch": 5.47, "learning_rate": 2.2628909551986476e-05, "loss": 0.4847, "step": 6476, "task_loss": 0.7433851361274719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5433123111724854, "epoch": 5.47, "learning_rate": 2.2624683009298396e-05, "loss": 0.6244, "step": 6477, "task_loss": 0.7271904349327087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5849413275718689, "epoch": 5.48, "learning_rate": 2.2620456466610313e-05, "loss": 0.6052, "step": 6478, "task_loss": 0.9450557827949524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3629588186740875, "epoch": 5.48, "learning_rate": 2.2616229923922232e-05, "loss": 0.385, "step": 6479, "task_loss": 0.15342813730239868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33997422456741333, "epoch": 5.48, "learning_rate": 2.2612003381234152e-05, "loss": 0.4322, "step": 6480, "task_loss": 0.2239326536655426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28108736872673035, "epoch": 5.48, "learning_rate": 2.2607776838546072e-05, "loss": 0.4186, "step": 6481, "task_loss": 0.5179224014282227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3189160227775574, "epoch": 5.48, "learning_rate": 2.2603550295857988e-05, "loss": 0.5085, "step": 6482, "task_loss": 0.9262568950653076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5939887762069702, "epoch": 5.48, "learning_rate": 2.2599323753169908e-05, "loss": 0.5062, "step": 6483, "task_loss": 1.4437553882598877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5564907789230347, "epoch": 5.48, "learning_rate": 2.2595097210481828e-05, "loss": 0.4729, "step": 6484, "task_loss": 1.0463241338729858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41006043553352356, "epoch": 5.48, "learning_rate": 2.2590870667793744e-05, "loss": 0.3606, "step": 6485, "task_loss": 1.1540050506591797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38574209809303284, "epoch": 5.48, "learning_rate": 2.2586644125105667e-05, "loss": 0.6044, "step": 6486, "task_loss": 0.33480316400527954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45560991764068604, "epoch": 5.48, "learning_rate": 2.2582417582417583e-05, "loss": 0.4519, "step": 6487, "task_loss": 0.7456464767456055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2866612374782562, "epoch": 5.48, "learning_rate": 2.2578191039729503e-05, "loss": 0.5121, "step": 6488, "task_loss": 0.02357068657875061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8256540298461914, "epoch": 5.48, "learning_rate": 2.2573964497041423e-05, "loss": 0.5862, "step": 6489, "task_loss": 1.5655720233917236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4515344500541687, "epoch": 5.49, "learning_rate": 2.256973795435334e-05, "loss": 0.4042, "step": 6490, "task_loss": 0.5995666980743408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3027622699737549, "epoch": 5.49, "learning_rate": 2.256551141166526e-05, "loss": 0.5452, "step": 6491, "task_loss": 1.830371379852295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4637233018875122, "epoch": 5.49, "learning_rate": 2.256128486897718e-05, "loss": 0.4726, "step": 6492, "task_loss": 0.8471509218215942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6204744577407837, "epoch": 5.49, "learning_rate": 2.2557058326289095e-05, "loss": 0.5464, "step": 6493, "task_loss": 0.5459375977516174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25900793075561523, "epoch": 5.49, "learning_rate": 2.2552831783601018e-05, "loss": 0.4367, "step": 6494, "task_loss": 0.804066002368927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4149196147918701, "epoch": 5.49, "learning_rate": 2.2548605240912935e-05, "loss": 0.448, "step": 6495, "task_loss": 0.5030509829521179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6226173639297485, "epoch": 5.49, "learning_rate": 2.254437869822485e-05, "loss": 0.5646, "step": 6496, "task_loss": 1.243924617767334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4392818808555603, "epoch": 5.49, "learning_rate": 2.2540152155536774e-05, "loss": 0.4997, "step": 6497, "task_loss": 0.5331965684890747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48184460401535034, "epoch": 5.49, "learning_rate": 2.253592561284869e-05, "loss": 0.4324, "step": 6498, "task_loss": 1.0983445644378662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5859066247940063, "epoch": 5.49, "learning_rate": 2.2531699070160607e-05, "loss": 0.5205, "step": 6499, "task_loss": 1.3031401634216309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5207016468048096, "epoch": 5.49, "learning_rate": 2.252747252747253e-05, "loss": 0.4405, "step": 6500, "task_loss": 0.810382068157196 }, { "epoch": 5.49, "eval_accuracy": 0.9071287128712872, "eval_loss": 0.31121769547462463, "eval_runtime": 230.0383, "eval_samples_per_second": 109.764, "eval_steps_per_second": 0.861, "step": 6500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25616952776908875, "epoch": 5.5, "learning_rate": 2.2523245984784446e-05, "loss": 0.4002, "step": 6501, "task_loss": 0.6353225708007812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36745402216911316, "epoch": 5.5, "learning_rate": 2.2519019442096366e-05, "loss": 0.4508, "step": 6502, "task_loss": 0.6705199480056763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2786528170108795, "epoch": 5.5, "learning_rate": 2.2514792899408286e-05, "loss": 0.5466, "step": 6503, "task_loss": 0.25337180495262146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5456166863441467, "epoch": 5.5, "learning_rate": 2.2510566356720202e-05, "loss": 0.5253, "step": 6504, "task_loss": 0.6149640679359436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5985019207000732, "epoch": 5.5, "learning_rate": 2.2506339814032125e-05, "loss": 0.4844, "step": 6505, "task_loss": 0.5040889382362366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4197734594345093, "epoch": 5.5, "learning_rate": 2.250211327134404e-05, "loss": 0.4802, "step": 6506, "task_loss": 0.9863604307174683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3264505863189697, "epoch": 5.5, "learning_rate": 2.2497886728655958e-05, "loss": 0.4333, "step": 6507, "task_loss": 0.5870899558067322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48236167430877686, "epoch": 5.5, "learning_rate": 2.249366018596788e-05, "loss": 0.4413, "step": 6508, "task_loss": 0.2647905945777893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5345989465713501, "epoch": 5.5, "learning_rate": 2.2489433643279797e-05, "loss": 0.5486, "step": 6509, "task_loss": 0.472306489944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47498559951782227, "epoch": 5.5, "learning_rate": 2.2485207100591717e-05, "loss": 0.42, "step": 6510, "task_loss": 0.5003288388252258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3009692430496216, "epoch": 5.5, "learning_rate": 2.2480980557903637e-05, "loss": 0.3826, "step": 6511, "task_loss": 0.38763153553009033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4064667522907257, "epoch": 5.5, "learning_rate": 2.2476754015215553e-05, "loss": 0.5453, "step": 6512, "task_loss": 0.6678287386894226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5118426084518433, "epoch": 5.51, "learning_rate": 2.2472527472527473e-05, "loss": 0.4578, "step": 6513, "task_loss": 0.7778018712997437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4149852991104126, "epoch": 5.51, "learning_rate": 2.2468300929839393e-05, "loss": 0.586, "step": 6514, "task_loss": 0.5796111822128296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18042577803134918, "epoch": 5.51, "learning_rate": 2.2464074387151312e-05, "loss": 0.3526, "step": 6515, "task_loss": 0.4205179810523987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4085802733898163, "epoch": 5.51, "learning_rate": 2.245984784446323e-05, "loss": 0.3893, "step": 6516, "task_loss": 0.22515836358070374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3967309296131134, "epoch": 5.51, "learning_rate": 2.245562130177515e-05, "loss": 0.4296, "step": 6517, "task_loss": 1.1171687841415405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8369637727737427, "epoch": 5.51, "learning_rate": 2.2451394759087068e-05, "loss": 0.4437, "step": 6518, "task_loss": 0.5157053470611572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4168230891227722, "epoch": 5.51, "learning_rate": 2.2447168216398988e-05, "loss": 0.6323, "step": 6519, "task_loss": 0.486141562461853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5836775302886963, "epoch": 5.51, "learning_rate": 2.2442941673710904e-05, "loss": 0.5197, "step": 6520, "task_loss": 0.7917031049728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5544719099998474, "epoch": 5.51, "learning_rate": 2.2438715131022824e-05, "loss": 0.4892, "step": 6521, "task_loss": 0.7600495219230652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28418242931365967, "epoch": 5.51, "learning_rate": 2.2434488588334744e-05, "loss": 0.3287, "step": 6522, "task_loss": 0.5098791122436523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4191673994064331, "epoch": 5.51, "learning_rate": 2.2430262045646664e-05, "loss": 0.4697, "step": 6523, "task_loss": 0.8762914538383484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41490814089775085, "epoch": 5.51, "learning_rate": 2.242603550295858e-05, "loss": 0.5809, "step": 6524, "task_loss": 0.2511128783226013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4627256989479065, "epoch": 5.52, "learning_rate": 2.24218089602705e-05, "loss": 0.4896, "step": 6525, "task_loss": 1.1867831945419312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4519067704677582, "epoch": 5.52, "learning_rate": 2.241758241758242e-05, "loss": 0.466, "step": 6526, "task_loss": 0.45990416407585144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2885536849498749, "epoch": 5.52, "learning_rate": 2.2413355874894336e-05, "loss": 0.3829, "step": 6527, "task_loss": 0.2812303900718689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6543110013008118, "epoch": 5.52, "learning_rate": 2.2409129332206255e-05, "loss": 0.5056, "step": 6528, "task_loss": 0.6204474568367004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4903867840766907, "epoch": 5.52, "learning_rate": 2.2404902789518175e-05, "loss": 0.477, "step": 6529, "task_loss": 0.31804174184799194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5285801887512207, "epoch": 5.52, "learning_rate": 2.2400676246830095e-05, "loss": 0.5004, "step": 6530, "task_loss": 0.8360893130302429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7006930708885193, "epoch": 5.52, "learning_rate": 2.2396449704142015e-05, "loss": 0.6388, "step": 6531, "task_loss": 1.1461968421936035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33204856514930725, "epoch": 5.52, "learning_rate": 2.239222316145393e-05, "loss": 0.3987, "step": 6532, "task_loss": 0.475440114736557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4505437910556793, "epoch": 5.52, "learning_rate": 2.238799661876585e-05, "loss": 0.4337, "step": 6533, "task_loss": 1.1317940950393677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21005533635616302, "epoch": 5.52, "learning_rate": 2.238377007607777e-05, "loss": 0.3488, "step": 6534, "task_loss": 0.0205268282443285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5093111395835876, "epoch": 5.52, "learning_rate": 2.2379543533389687e-05, "loss": 0.4081, "step": 6535, "task_loss": 0.7224283814430237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5326557159423828, "epoch": 5.52, "learning_rate": 2.2375316990701607e-05, "loss": 0.6215, "step": 6536, "task_loss": 0.4654531478881836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5011619925498962, "epoch": 5.53, "learning_rate": 2.2371090448013526e-05, "loss": 0.4852, "step": 6537, "task_loss": 0.4804128408432007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3525070250034332, "epoch": 5.53, "learning_rate": 2.2366863905325443e-05, "loss": 0.5048, "step": 6538, "task_loss": 0.975352942943573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3623230457305908, "epoch": 5.53, "learning_rate": 2.2362637362637366e-05, "loss": 0.433, "step": 6539, "task_loss": 0.3931300640106201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6981106996536255, "epoch": 5.53, "learning_rate": 2.2358410819949282e-05, "loss": 0.4037, "step": 6540, "task_loss": 1.199479579925537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4749060869216919, "epoch": 5.53, "learning_rate": 2.2354184277261202e-05, "loss": 0.5002, "step": 6541, "task_loss": 1.0661007165908813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32553720474243164, "epoch": 5.53, "learning_rate": 2.234995773457312e-05, "loss": 0.3613, "step": 6542, "task_loss": 0.11945799738168716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4936124086380005, "epoch": 5.53, "learning_rate": 2.2345731191885038e-05, "loss": 0.4778, "step": 6543, "task_loss": 0.5682523846626282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5230497717857361, "epoch": 5.53, "learning_rate": 2.2341504649196958e-05, "loss": 0.4574, "step": 6544, "task_loss": 0.9144527912139893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28021690249443054, "epoch": 5.53, "learning_rate": 2.2337278106508877e-05, "loss": 0.4136, "step": 6545, "task_loss": 0.5678066611289978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5350494980812073, "epoch": 5.53, "learning_rate": 2.2333051563820794e-05, "loss": 0.5417, "step": 6546, "task_loss": 1.476010799407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3645980656147003, "epoch": 5.53, "learning_rate": 2.2328825021132717e-05, "loss": 0.5291, "step": 6547, "task_loss": 0.3196815252304077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22159670293331146, "epoch": 5.53, "learning_rate": 2.2324598478444633e-05, "loss": 0.3842, "step": 6548, "task_loss": 0.06847655028104782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.542724609375, "epoch": 5.54, "learning_rate": 2.232037193575655e-05, "loss": 0.5366, "step": 6549, "task_loss": 0.8789463043212891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5722472667694092, "epoch": 5.54, "learning_rate": 2.2316145393068473e-05, "loss": 0.5189, "step": 6550, "task_loss": 1.522711992263794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4096110463142395, "epoch": 5.54, "learning_rate": 2.231191885038039e-05, "loss": 0.4056, "step": 6551, "task_loss": 1.3393573760986328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.209906667470932, "epoch": 5.54, "learning_rate": 2.230769230769231e-05, "loss": 0.4308, "step": 6552, "task_loss": 0.4893885850906372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3922348618507385, "epoch": 5.54, "learning_rate": 2.230346576500423e-05, "loss": 0.5648, "step": 6553, "task_loss": 0.4997672736644745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49192938208580017, "epoch": 5.54, "learning_rate": 2.2299239222316145e-05, "loss": 0.5148, "step": 6554, "task_loss": 0.7971968054771423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48135387897491455, "epoch": 5.54, "learning_rate": 2.2295012679628065e-05, "loss": 0.4434, "step": 6555, "task_loss": 1.213742971420288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31976959109306335, "epoch": 5.54, "learning_rate": 2.2290786136939984e-05, "loss": 0.4681, "step": 6556, "task_loss": 0.4441589415073395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37154245376586914, "epoch": 5.54, "learning_rate": 2.22865595942519e-05, "loss": 0.433, "step": 6557, "task_loss": 1.0343756675720215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5619939565658569, "epoch": 5.54, "learning_rate": 2.2282333051563824e-05, "loss": 0.5246, "step": 6558, "task_loss": 0.6676798462867737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2783992886543274, "epoch": 5.54, "learning_rate": 2.227810650887574e-05, "loss": 0.3707, "step": 6559, "task_loss": 0.610399603843689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.769432783126831, "epoch": 5.54, "learning_rate": 2.227387996618766e-05, "loss": 0.4293, "step": 6560, "task_loss": 1.2190186977386475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5816839933395386, "epoch": 5.55, "learning_rate": 2.226965342349958e-05, "loss": 0.5375, "step": 6561, "task_loss": 0.6093623042106628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2690340280532837, "epoch": 5.55, "learning_rate": 2.2265426880811496e-05, "loss": 0.3431, "step": 6562, "task_loss": 0.23161719739437103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46483105421066284, "epoch": 5.55, "learning_rate": 2.2261200338123416e-05, "loss": 0.4134, "step": 6563, "task_loss": 0.7454406023025513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36716657876968384, "epoch": 5.55, "learning_rate": 2.2256973795435336e-05, "loss": 0.534, "step": 6564, "task_loss": 0.16079899668693542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.398648202419281, "epoch": 5.55, "learning_rate": 2.2252747252747252e-05, "loss": 0.4391, "step": 6565, "task_loss": 1.6294955015182495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44931724667549133, "epoch": 5.55, "learning_rate": 2.224852071005917e-05, "loss": 0.4155, "step": 6566, "task_loss": 0.5449956655502319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5374575853347778, "epoch": 5.55, "learning_rate": 2.224429416737109e-05, "loss": 0.3974, "step": 6567, "task_loss": 0.9460140466690063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48534244298934937, "epoch": 5.55, "learning_rate": 2.224006762468301e-05, "loss": 0.5355, "step": 6568, "task_loss": 0.6801758408546448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49755018949508667, "epoch": 5.55, "learning_rate": 2.223584108199493e-05, "loss": 0.5291, "step": 6569, "task_loss": 0.1559273600578308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6028162240982056, "epoch": 5.55, "learning_rate": 2.2231614539306847e-05, "loss": 0.5084, "step": 6570, "task_loss": 1.3433688879013062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6831220388412476, "epoch": 5.55, "learning_rate": 2.2227387996618767e-05, "loss": 0.6116, "step": 6571, "task_loss": 1.560497522354126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4108806848526001, "epoch": 5.56, "learning_rate": 2.2223161453930687e-05, "loss": 0.4632, "step": 6572, "task_loss": 1.8107231855392456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6039530634880066, "epoch": 5.56, "learning_rate": 2.2218934911242606e-05, "loss": 0.4891, "step": 6573, "task_loss": 0.44830143451690674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3714635968208313, "epoch": 5.56, "learning_rate": 2.2214708368554523e-05, "loss": 0.416, "step": 6574, "task_loss": 0.8233501315116882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4858923852443695, "epoch": 5.56, "learning_rate": 2.2210481825866443e-05, "loss": 0.4304, "step": 6575, "task_loss": 0.8596088886260986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43654894828796387, "epoch": 5.56, "learning_rate": 2.2206255283178362e-05, "loss": 0.5777, "step": 6576, "task_loss": 0.2936279773712158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4588269591331482, "epoch": 5.56, "learning_rate": 2.220202874049028e-05, "loss": 0.4125, "step": 6577, "task_loss": 0.40271538496017456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4263407588005066, "epoch": 5.56, "learning_rate": 2.21978021978022e-05, "loss": 0.4968, "step": 6578, "task_loss": 0.8342435956001282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3663595914840698, "epoch": 5.56, "learning_rate": 2.2193575655114118e-05, "loss": 0.3902, "step": 6579, "task_loss": 0.8362998366355896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4083057641983032, "epoch": 5.56, "learning_rate": 2.2189349112426034e-05, "loss": 0.3909, "step": 6580, "task_loss": 0.3547692894935608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36761537194252014, "epoch": 5.56, "learning_rate": 2.2185122569737958e-05, "loss": 0.4281, "step": 6581, "task_loss": 0.44335564970970154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6638447046279907, "epoch": 5.56, "learning_rate": 2.2180896027049874e-05, "loss": 0.5134, "step": 6582, "task_loss": 0.9176337718963623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42400234937667847, "epoch": 5.56, "learning_rate": 2.2176669484361794e-05, "loss": 0.4589, "step": 6583, "task_loss": 0.7746252417564392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2727625370025635, "epoch": 5.57, "learning_rate": 2.2172442941673713e-05, "loss": 0.3334, "step": 6584, "task_loss": 0.03718046471476555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3818608820438385, "epoch": 5.57, "learning_rate": 2.216821639898563e-05, "loss": 0.3817, "step": 6585, "task_loss": 1.5395042896270752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3122103810310364, "epoch": 5.57, "learning_rate": 2.216398985629755e-05, "loss": 0.3716, "step": 6586, "task_loss": 0.4920465350151062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18556299805641174, "epoch": 5.57, "learning_rate": 2.215976331360947e-05, "loss": 0.3941, "step": 6587, "task_loss": 0.07084954530000687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39843857288360596, "epoch": 5.57, "learning_rate": 2.2155536770921386e-05, "loss": 0.393, "step": 6588, "task_loss": 0.4581957757472992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33707818388938904, "epoch": 5.57, "learning_rate": 2.215131022823331e-05, "loss": 0.491, "step": 6589, "task_loss": 0.5638668537139893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5662754774093628, "epoch": 5.57, "learning_rate": 2.2147083685545225e-05, "loss": 0.4797, "step": 6590, "task_loss": 0.8410687446594238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45930880308151245, "epoch": 5.57, "learning_rate": 2.214285714285714e-05, "loss": 0.5129, "step": 6591, "task_loss": 0.4605422616004944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5784786939620972, "epoch": 5.57, "learning_rate": 2.2138630600169065e-05, "loss": 0.467, "step": 6592, "task_loss": 0.5142861008644104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5326275825500488, "epoch": 5.57, "learning_rate": 2.213440405748098e-05, "loss": 0.4969, "step": 6593, "task_loss": 0.8516028523445129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3323522210121155, "epoch": 5.57, "learning_rate": 2.21301775147929e-05, "loss": 0.4285, "step": 6594, "task_loss": 0.49355900287628174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45413345098495483, "epoch": 5.57, "learning_rate": 2.212595097210482e-05, "loss": 0.5078, "step": 6595, "task_loss": 0.13283292949199677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47557783126831055, "epoch": 5.58, "learning_rate": 2.2121724429416737e-05, "loss": 0.5227, "step": 6596, "task_loss": 0.8164372444152832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21432280540466309, "epoch": 5.58, "learning_rate": 2.2117497886728656e-05, "loss": 0.4463, "step": 6597, "task_loss": 0.3244723379611969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3982103765010834, "epoch": 5.58, "learning_rate": 2.2113271344040576e-05, "loss": 0.488, "step": 6598, "task_loss": 0.7477995157241821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3384738266468048, "epoch": 5.58, "learning_rate": 2.2109044801352493e-05, "loss": 0.3274, "step": 6599, "task_loss": 0.23857639729976654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6250789165496826, "epoch": 5.58, "learning_rate": 2.2104818258664416e-05, "loss": 0.423, "step": 6600, "task_loss": 0.3815581500530243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7946285009384155, "epoch": 5.58, "learning_rate": 2.2100591715976332e-05, "loss": 0.6503, "step": 6601, "task_loss": 1.7416517734527588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3971802294254303, "epoch": 5.58, "learning_rate": 2.2096365173288252e-05, "loss": 0.4016, "step": 6602, "task_loss": 0.41994473338127136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6118603944778442, "epoch": 5.58, "learning_rate": 2.209213863060017e-05, "loss": 0.5329, "step": 6603, "task_loss": 1.5953171253204346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2875477075576782, "epoch": 5.58, "learning_rate": 2.2087912087912088e-05, "loss": 0.3788, "step": 6604, "task_loss": 0.3744802176952362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4883019030094147, "epoch": 5.58, "learning_rate": 2.2083685545224008e-05, "loss": 0.5034, "step": 6605, "task_loss": 1.2311573028564453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2491014003753662, "epoch": 5.58, "learning_rate": 2.2079459002535927e-05, "loss": 0.4192, "step": 6606, "task_loss": 0.23697152733802795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.314663827419281, "epoch": 5.58, "learning_rate": 2.2075232459847844e-05, "loss": 0.3892, "step": 6607, "task_loss": 0.34976285696029663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2968905568122864, "epoch": 5.59, "learning_rate": 2.2071005917159763e-05, "loss": 0.4612, "step": 6608, "task_loss": 0.2096678763628006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4578056335449219, "epoch": 5.59, "learning_rate": 2.2066779374471683e-05, "loss": 0.5863, "step": 6609, "task_loss": 1.4398854970932007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8692323565483093, "epoch": 5.59, "learning_rate": 2.2062552831783603e-05, "loss": 0.599, "step": 6610, "task_loss": 1.1830549240112305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2681110203266144, "epoch": 5.59, "learning_rate": 2.2058326289095523e-05, "loss": 0.4221, "step": 6611, "task_loss": 1.8927868604660034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5832124352455139, "epoch": 5.59, "learning_rate": 2.205409974640744e-05, "loss": 0.4791, "step": 6612, "task_loss": 0.3963264524936676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20971204340457916, "epoch": 5.59, "learning_rate": 2.204987320371936e-05, "loss": 0.3889, "step": 6613, "task_loss": 0.5461903214454651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36383718252182007, "epoch": 5.59, "learning_rate": 2.204564666103128e-05, "loss": 0.4383, "step": 6614, "task_loss": 0.39409035444259644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.464749813079834, "epoch": 5.59, "learning_rate": 2.2041420118343195e-05, "loss": 0.4723, "step": 6615, "task_loss": 1.0040212869644165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4018401503562927, "epoch": 5.59, "learning_rate": 2.2037193575655115e-05, "loss": 0.3811, "step": 6616, "task_loss": 0.9969697594642639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2552361488342285, "epoch": 5.59, "learning_rate": 2.2032967032967034e-05, "loss": 0.3915, "step": 6617, "task_loss": 0.28578609228134155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2928164601325989, "epoch": 5.59, "learning_rate": 2.2028740490278954e-05, "loss": 0.3754, "step": 6618, "task_loss": 0.14997069537639618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.363754540681839, "epoch": 5.59, "learning_rate": 2.202451394759087e-05, "loss": 0.3904, "step": 6619, "task_loss": 1.4418221712112427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28526636958122253, "epoch": 5.6, "learning_rate": 2.202028740490279e-05, "loss": 0.3534, "step": 6620, "task_loss": 0.6053158640861511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41791990399360657, "epoch": 5.6, "learning_rate": 2.201606086221471e-05, "loss": 0.493, "step": 6621, "task_loss": 0.4252471923828125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5734915733337402, "epoch": 5.6, "learning_rate": 2.201183431952663e-05, "loss": 0.5535, "step": 6622, "task_loss": 1.1274453401565552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7606749534606934, "epoch": 5.6, "learning_rate": 2.2007607776838546e-05, "loss": 0.5223, "step": 6623, "task_loss": 1.050026297569275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7250608205795288, "epoch": 5.6, "learning_rate": 2.2003381234150466e-05, "loss": 0.6297, "step": 6624, "task_loss": 1.1712957620620728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19725146889686584, "epoch": 5.6, "learning_rate": 2.1999154691462385e-05, "loss": 0.5004, "step": 6625, "task_loss": 0.015571881085634232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3152746260166168, "epoch": 5.6, "learning_rate": 2.1994928148774305e-05, "loss": 0.418, "step": 6626, "task_loss": 0.4012804627418518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5508754849433899, "epoch": 5.6, "learning_rate": 2.199070160608622e-05, "loss": 0.4614, "step": 6627, "task_loss": 1.1642802953720093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23977942764759064, "epoch": 5.6, "learning_rate": 2.198647506339814e-05, "loss": 0.4739, "step": 6628, "task_loss": 0.2495940923690796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9456596970558167, "epoch": 5.6, "learning_rate": 2.198224852071006e-05, "loss": 0.5896, "step": 6629, "task_loss": 0.8423975110054016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2918052077293396, "epoch": 5.6, "learning_rate": 2.1978021978021977e-05, "loss": 0.3528, "step": 6630, "task_loss": 0.3776776194572449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28707656264305115, "epoch": 5.6, "learning_rate": 2.19737954353339e-05, "loss": 0.4453, "step": 6631, "task_loss": 1.297120213508606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5698080658912659, "epoch": 5.61, "learning_rate": 2.1969568892645817e-05, "loss": 0.4705, "step": 6632, "task_loss": 0.23067864775657654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22129428386688232, "epoch": 5.61, "learning_rate": 2.1965342349957737e-05, "loss": 0.6486, "step": 6633, "task_loss": 0.6887525320053101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3690264821052551, "epoch": 5.61, "learning_rate": 2.1961115807269656e-05, "loss": 0.5817, "step": 6634, "task_loss": 0.44805046916007996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3376394212245941, "epoch": 5.61, "learning_rate": 2.1956889264581573e-05, "loss": 0.4408, "step": 6635, "task_loss": 0.1119861900806427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7915014028549194, "epoch": 5.61, "learning_rate": 2.1952662721893492e-05, "loss": 0.7133, "step": 6636, "task_loss": 0.950564444065094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3085026443004608, "epoch": 5.61, "learning_rate": 2.1948436179205412e-05, "loss": 0.3289, "step": 6637, "task_loss": 0.2138867974281311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3445605933666229, "epoch": 5.61, "learning_rate": 2.194420963651733e-05, "loss": 0.461, "step": 6638, "task_loss": 0.5891308188438416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29952695965766907, "epoch": 5.61, "learning_rate": 2.193998309382925e-05, "loss": 0.4931, "step": 6639, "task_loss": 0.08761756122112274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3112938404083252, "epoch": 5.61, "learning_rate": 2.1935756551141168e-05, "loss": 0.4814, "step": 6640, "task_loss": 0.38097134232521057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43494877219200134, "epoch": 5.61, "learning_rate": 2.1931530008453084e-05, "loss": 0.5909, "step": 6641, "task_loss": 0.4314068555831909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21665450930595398, "epoch": 5.61, "learning_rate": 2.1927303465765007e-05, "loss": 0.5129, "step": 6642, "task_loss": 0.19272123277187347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6129412055015564, "epoch": 5.61, "learning_rate": 2.1923076923076924e-05, "loss": 0.524, "step": 6643, "task_loss": 0.5507757067680359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4304733872413635, "epoch": 5.62, "learning_rate": 2.191885038038884e-05, "loss": 0.4757, "step": 6644, "task_loss": 0.08917589485645294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7696310877799988, "epoch": 5.62, "learning_rate": 2.1914623837700763e-05, "loss": 0.6087, "step": 6645, "task_loss": 0.7796250581741333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2461678385734558, "epoch": 5.62, "learning_rate": 2.191039729501268e-05, "loss": 0.3226, "step": 6646, "task_loss": 0.26112043857574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45307403802871704, "epoch": 5.62, "learning_rate": 2.19061707523246e-05, "loss": 0.4588, "step": 6647, "task_loss": 1.579825758934021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3606126606464386, "epoch": 5.62, "learning_rate": 2.190194420963652e-05, "loss": 0.3957, "step": 6648, "task_loss": 0.1721055656671524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5496014952659607, "epoch": 5.62, "learning_rate": 2.1897717666948435e-05, "loss": 0.3768, "step": 6649, "task_loss": 0.41457489132881165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29964929819107056, "epoch": 5.62, "learning_rate": 2.189349112426036e-05, "loss": 0.4423, "step": 6650, "task_loss": 0.5744560360908508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012792348861694, "epoch": 5.62, "learning_rate": 2.1889264581572275e-05, "loss": 0.4371, "step": 6651, "task_loss": 0.7494077682495117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33123207092285156, "epoch": 5.62, "learning_rate": 2.188503803888419e-05, "loss": 0.37, "step": 6652, "task_loss": 0.6639485359191895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3170870244503021, "epoch": 5.62, "learning_rate": 2.1880811496196114e-05, "loss": 0.5049, "step": 6653, "task_loss": 0.918781042098999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28564170002937317, "epoch": 5.62, "learning_rate": 2.187658495350803e-05, "loss": 0.3822, "step": 6654, "task_loss": 0.5689250826835632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5809497833251953, "epoch": 5.63, "learning_rate": 2.187235841081995e-05, "loss": 0.5303, "step": 6655, "task_loss": 1.0988904237747192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49017542600631714, "epoch": 5.63, "learning_rate": 2.186813186813187e-05, "loss": 0.4152, "step": 6656, "task_loss": 1.0502938032150269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43486344814300537, "epoch": 5.63, "learning_rate": 2.1863905325443787e-05, "loss": 0.6119, "step": 6657, "task_loss": 0.5644630193710327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44015204906463623, "epoch": 5.63, "learning_rate": 2.1859678782755706e-05, "loss": 0.4648, "step": 6658, "task_loss": 0.3661627769470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2525050640106201, "epoch": 5.63, "learning_rate": 2.1855452240067626e-05, "loss": 0.384, "step": 6659, "task_loss": 0.28871649503707886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46284550428390503, "epoch": 5.63, "learning_rate": 2.1851225697379546e-05, "loss": 0.4914, "step": 6660, "task_loss": 1.477583646774292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3007497191429138, "epoch": 5.63, "learning_rate": 2.1846999154691462e-05, "loss": 0.3943, "step": 6661, "task_loss": 0.6044033765792847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.547548770904541, "epoch": 5.63, "learning_rate": 2.1842772612003382e-05, "loss": 0.5052, "step": 6662, "task_loss": 1.746382713317871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43696457147598267, "epoch": 5.63, "learning_rate": 2.18385460693153e-05, "loss": 0.359, "step": 6663, "task_loss": 0.7883840799331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3060758709907532, "epoch": 5.63, "learning_rate": 2.183431952662722e-05, "loss": 0.3919, "step": 6664, "task_loss": 1.4962902069091797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.620410680770874, "epoch": 5.63, "learning_rate": 2.1830092983939138e-05, "loss": 0.6356, "step": 6665, "task_loss": 0.6708673238754272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22259894013404846, "epoch": 5.63, "learning_rate": 2.1825866441251057e-05, "loss": 0.3349, "step": 6666, "task_loss": 0.40206316113471985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.514568567276001, "epoch": 5.64, "learning_rate": 2.1821639898562977e-05, "loss": 0.4667, "step": 6667, "task_loss": 0.8175463676452637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.385628879070282, "epoch": 5.64, "learning_rate": 2.1817413355874897e-05, "loss": 0.3695, "step": 6668, "task_loss": 0.24502016603946686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26250022649765015, "epoch": 5.64, "learning_rate": 2.1813186813186813e-05, "loss": 0.3639, "step": 6669, "task_loss": 0.37060174345970154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44420403242111206, "epoch": 5.64, "learning_rate": 2.1808960270498733e-05, "loss": 0.5238, "step": 6670, "task_loss": 1.528491735458374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3467496335506439, "epoch": 5.64, "learning_rate": 2.1804733727810653e-05, "loss": 0.5599, "step": 6671, "task_loss": 0.5854769945144653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5276918411254883, "epoch": 5.64, "learning_rate": 2.180050718512257e-05, "loss": 0.4836, "step": 6672, "task_loss": 0.46222230792045593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7202828526496887, "epoch": 5.64, "learning_rate": 2.179628064243449e-05, "loss": 0.5274, "step": 6673, "task_loss": 1.1488972902297974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4792698621749878, "epoch": 5.64, "learning_rate": 2.179205409974641e-05, "loss": 0.5381, "step": 6674, "task_loss": 1.0096802711486816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4209651052951813, "epoch": 5.64, "learning_rate": 2.1787827557058328e-05, "loss": 0.389, "step": 6675, "task_loss": 0.3032013773918152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5430549383163452, "epoch": 5.64, "learning_rate": 2.1783601014370248e-05, "loss": 0.4336, "step": 6676, "task_loss": 0.7982643246650696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4977704882621765, "epoch": 5.64, "learning_rate": 2.1779374471682164e-05, "loss": 0.5614, "step": 6677, "task_loss": 1.145737648010254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3327661454677582, "epoch": 5.64, "learning_rate": 2.1775147928994084e-05, "loss": 0.4002, "step": 6678, "task_loss": 0.2778187692165375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24384522438049316, "epoch": 5.65, "learning_rate": 2.1770921386306004e-05, "loss": 0.3485, "step": 6679, "task_loss": 0.1743202954530716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35648736357688904, "epoch": 5.65, "learning_rate": 2.176669484361792e-05, "loss": 0.4728, "step": 6680, "task_loss": 0.9142789244651794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44687050580978394, "epoch": 5.65, "learning_rate": 2.176246830092984e-05, "loss": 0.3833, "step": 6681, "task_loss": 0.18046848475933075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3163347840309143, "epoch": 5.65, "learning_rate": 2.175824175824176e-05, "loss": 0.472, "step": 6682, "task_loss": 0.6492880582809448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5634894967079163, "epoch": 5.65, "learning_rate": 2.1754015215553676e-05, "loss": 0.5677, "step": 6683, "task_loss": 0.5417191386222839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3187306821346283, "epoch": 5.65, "learning_rate": 2.17497886728656e-05, "loss": 0.4391, "step": 6684, "task_loss": 0.3030279576778412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5657342672348022, "epoch": 5.65, "learning_rate": 2.1745562130177516e-05, "loss": 0.447, "step": 6685, "task_loss": 1.1252855062484741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.525389552116394, "epoch": 5.65, "learning_rate": 2.1741335587489435e-05, "loss": 0.5207, "step": 6686, "task_loss": 0.9672444462776184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30847275257110596, "epoch": 5.65, "learning_rate": 2.1737109044801355e-05, "loss": 0.3749, "step": 6687, "task_loss": 0.31510409712791443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5977796316146851, "epoch": 5.65, "learning_rate": 2.173288250211327e-05, "loss": 0.4986, "step": 6688, "task_loss": 0.9500839114189148 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3775804042816162, "epoch": 5.65, "learning_rate": 2.172865595942519e-05, "loss": 0.4134, "step": 6689, "task_loss": 0.7767316699028015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2688702940940857, "epoch": 5.65, "learning_rate": 2.172442941673711e-05, "loss": 0.3116, "step": 6690, "task_loss": 0.45763349533081055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4715948700904846, "epoch": 5.66, "learning_rate": 2.1720202874049027e-05, "loss": 0.6495, "step": 6691, "task_loss": 1.0293368101119995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5192683935165405, "epoch": 5.66, "learning_rate": 2.171597633136095e-05, "loss": 0.4939, "step": 6692, "task_loss": 1.4563164710998535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3596770465373993, "epoch": 5.66, "learning_rate": 2.1711749788672867e-05, "loss": 0.4439, "step": 6693, "task_loss": 0.3766735792160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3958856463432312, "epoch": 5.66, "learning_rate": 2.1707523245984783e-05, "loss": 0.4512, "step": 6694, "task_loss": 0.9399265646934509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46372485160827637, "epoch": 5.66, "learning_rate": 2.1703296703296706e-05, "loss": 0.6369, "step": 6695, "task_loss": 1.3706023693084717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5102686285972595, "epoch": 5.66, "learning_rate": 2.1699070160608622e-05, "loss": 0.3982, "step": 6696, "task_loss": 0.4204097092151642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5004092454910278, "epoch": 5.66, "learning_rate": 2.1694843617920542e-05, "loss": 0.496, "step": 6697, "task_loss": 0.7308244109153748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31620413064956665, "epoch": 5.66, "learning_rate": 2.1690617075232462e-05, "loss": 0.4924, "step": 6698, "task_loss": 0.5250855684280396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29264551401138306, "epoch": 5.66, "learning_rate": 2.1686390532544378e-05, "loss": 0.3504, "step": 6699, "task_loss": 0.5557460784912109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4742107391357422, "epoch": 5.66, "learning_rate": 2.1682163989856298e-05, "loss": 0.5024, "step": 6700, "task_loss": 1.3855253458023071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5794758200645447, "epoch": 5.66, "learning_rate": 2.1677937447168218e-05, "loss": 0.4461, "step": 6701, "task_loss": 0.6162325143814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5781143307685852, "epoch": 5.66, "learning_rate": 2.1673710904480134e-05, "loss": 0.4309, "step": 6702, "task_loss": 0.8657614588737488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2678454518318176, "epoch": 5.67, "learning_rate": 2.1669484361792057e-05, "loss": 0.4695, "step": 6703, "task_loss": 0.5560580492019653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46910807490348816, "epoch": 5.67, "learning_rate": 2.1665257819103974e-05, "loss": 0.4655, "step": 6704, "task_loss": 1.2586930990219116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5803413987159729, "epoch": 5.67, "learning_rate": 2.1661031276415893e-05, "loss": 0.5281, "step": 6705, "task_loss": 1.010599970817566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7397748827934265, "epoch": 5.67, "learning_rate": 2.1656804733727813e-05, "loss": 0.481, "step": 6706, "task_loss": 0.1796511709690094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7339752316474915, "epoch": 5.67, "learning_rate": 2.165257819103973e-05, "loss": 0.5426, "step": 6707, "task_loss": 1.0069605112075806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5188617706298828, "epoch": 5.67, "learning_rate": 2.164835164835165e-05, "loss": 0.5592, "step": 6708, "task_loss": 1.3485506772994995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3367837071418762, "epoch": 5.67, "learning_rate": 2.164412510566357e-05, "loss": 0.3851, "step": 6709, "task_loss": 0.1405501812696457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40066462755203247, "epoch": 5.67, "learning_rate": 2.1639898562975485e-05, "loss": 0.5281, "step": 6710, "task_loss": 0.7205643653869629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6344140768051147, "epoch": 5.67, "learning_rate": 2.1635672020287405e-05, "loss": 0.4494, "step": 6711, "task_loss": 0.378339558839798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.298669695854187, "epoch": 5.67, "learning_rate": 2.1631445477599325e-05, "loss": 0.4326, "step": 6712, "task_loss": 0.6739474534988403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22347085177898407, "epoch": 5.67, "learning_rate": 2.1627218934911244e-05, "loss": 0.486, "step": 6713, "task_loss": 0.29230520129203796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31795555353164673, "epoch": 5.67, "learning_rate": 2.1622992392223164e-05, "loss": 0.3697, "step": 6714, "task_loss": 0.36035627126693726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4322434663772583, "epoch": 5.68, "learning_rate": 2.161876584953508e-05, "loss": 0.4574, "step": 6715, "task_loss": 1.0636188983917236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41897836327552795, "epoch": 5.68, "learning_rate": 2.1614539306847e-05, "loss": 0.3685, "step": 6716, "task_loss": 0.5359217524528503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.256425678730011, "epoch": 5.68, "learning_rate": 2.161031276415892e-05, "loss": 0.4938, "step": 6717, "task_loss": 0.9868853092193604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4227105975151062, "epoch": 5.68, "learning_rate": 2.160608622147084e-05, "loss": 0.4819, "step": 6718, "task_loss": 1.48383367061615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2603036165237427, "epoch": 5.68, "learning_rate": 2.1601859678782756e-05, "loss": 0.471, "step": 6719, "task_loss": 0.04105174541473389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6495502591133118, "epoch": 5.68, "learning_rate": 2.1597633136094676e-05, "loss": 0.5038, "step": 6720, "task_loss": 1.2899093627929688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6795089840888977, "epoch": 5.68, "learning_rate": 2.1593406593406596e-05, "loss": 0.4908, "step": 6721, "task_loss": 0.6083970665931702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2990046739578247, "epoch": 5.68, "learning_rate": 2.1589180050718512e-05, "loss": 0.5593, "step": 6722, "task_loss": 0.30463266372680664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7162476778030396, "epoch": 5.68, "learning_rate": 2.1584953508030432e-05, "loss": 0.634, "step": 6723, "task_loss": 1.206944465637207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45765915513038635, "epoch": 5.68, "learning_rate": 2.158072696534235e-05, "loss": 0.5834, "step": 6724, "task_loss": 0.6975574493408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4896770417690277, "epoch": 5.68, "learning_rate": 2.1576500422654268e-05, "loss": 0.5133, "step": 6725, "task_loss": 0.5666850209236145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.51052325963974, "epoch": 5.69, "learning_rate": 2.157227387996619e-05, "loss": 0.534, "step": 6726, "task_loss": 0.6694901585578918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7746536731719971, "epoch": 5.69, "learning_rate": 2.1568047337278107e-05, "loss": 0.5707, "step": 6727, "task_loss": 0.9090960025787354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5497757196426392, "epoch": 5.69, "learning_rate": 2.1563820794590027e-05, "loss": 0.5242, "step": 6728, "task_loss": 0.44954121112823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4458220899105072, "epoch": 5.69, "learning_rate": 2.1559594251901947e-05, "loss": 0.5551, "step": 6729, "task_loss": 0.3808966279029846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5431913733482361, "epoch": 5.69, "learning_rate": 2.1555367709213863e-05, "loss": 0.3506, "step": 6730, "task_loss": 0.5021058320999146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4136313199996948, "epoch": 5.69, "learning_rate": 2.1551141166525783e-05, "loss": 0.5544, "step": 6731, "task_loss": 1.0348055362701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3025673031806946, "epoch": 5.69, "learning_rate": 2.1546914623837703e-05, "loss": 0.4732, "step": 6732, "task_loss": 0.940022349357605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3571837544441223, "epoch": 5.69, "learning_rate": 2.154268808114962e-05, "loss": 0.4813, "step": 6733, "task_loss": 1.0127058029174805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.488209068775177, "epoch": 5.69, "learning_rate": 2.1538461538461542e-05, "loss": 0.4574, "step": 6734, "task_loss": 0.7513001561164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5237274169921875, "epoch": 5.69, "learning_rate": 2.153423499577346e-05, "loss": 0.4568, "step": 6735, "task_loss": 0.37515881657600403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3149186074733734, "epoch": 5.69, "learning_rate": 2.1530008453085375e-05, "loss": 0.6078, "step": 6736, "task_loss": 0.5907328128814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4140559434890747, "epoch": 5.69, "learning_rate": 2.1525781910397298e-05, "loss": 0.4045, "step": 6737, "task_loss": 1.326681137084961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30922675132751465, "epoch": 5.7, "learning_rate": 2.1521555367709214e-05, "loss": 0.5436, "step": 6738, "task_loss": 1.0444397926330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38426321744918823, "epoch": 5.7, "learning_rate": 2.1517328825021134e-05, "loss": 0.3692, "step": 6739, "task_loss": 0.2168019860982895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48237109184265137, "epoch": 5.7, "learning_rate": 2.1513102282333054e-05, "loss": 0.4087, "step": 6740, "task_loss": 0.8307868242263794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5990522503852844, "epoch": 5.7, "learning_rate": 2.150887573964497e-05, "loss": 0.4345, "step": 6741, "task_loss": 0.8067715167999268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36341625452041626, "epoch": 5.7, "learning_rate": 2.150464919695689e-05, "loss": 0.5185, "step": 6742, "task_loss": 0.6057892441749573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3511134386062622, "epoch": 5.7, "learning_rate": 2.150042265426881e-05, "loss": 0.4416, "step": 6743, "task_loss": 0.8138015270233154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3959570527076721, "epoch": 5.7, "learning_rate": 2.1496196111580726e-05, "loss": 0.3349, "step": 6744, "task_loss": 0.4190218448638916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4763460159301758, "epoch": 5.7, "learning_rate": 2.149196956889265e-05, "loss": 0.4539, "step": 6745, "task_loss": 0.4817401170730591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28733617067337036, "epoch": 5.7, "learning_rate": 2.1487743026204565e-05, "loss": 0.3561, "step": 6746, "task_loss": 0.46150100231170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43958157300949097, "epoch": 5.7, "learning_rate": 2.1483516483516482e-05, "loss": 0.4914, "step": 6747, "task_loss": 0.5022943019866943 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4857657551765442, "epoch": 5.7, "learning_rate": 2.1479289940828405e-05, "loss": 0.5228, "step": 6748, "task_loss": 0.24779477715492249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5735419988632202, "epoch": 5.7, "learning_rate": 2.147506339814032e-05, "loss": 0.4484, "step": 6749, "task_loss": 0.8466213345527649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4814309775829315, "epoch": 5.71, "learning_rate": 2.147083685545224e-05, "loss": 0.3724, "step": 6750, "task_loss": 0.41916024684906006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3465757966041565, "epoch": 5.71, "learning_rate": 2.146661031276416e-05, "loss": 0.4241, "step": 6751, "task_loss": 0.44277143478393555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3248085379600525, "epoch": 5.71, "learning_rate": 2.1462383770076077e-05, "loss": 0.3383, "step": 6752, "task_loss": 1.2061140537261963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46878859400749207, "epoch": 5.71, "learning_rate": 2.1458157227387997e-05, "loss": 0.4509, "step": 6753, "task_loss": 0.6678359508514404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4628848135471344, "epoch": 5.71, "learning_rate": 2.1453930684699916e-05, "loss": 0.4079, "step": 6754, "task_loss": 1.001604676246643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3515365421772003, "epoch": 5.71, "learning_rate": 2.1449704142011836e-05, "loss": 0.4458, "step": 6755, "task_loss": 0.6357600092887878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5633479952812195, "epoch": 5.71, "learning_rate": 2.1445477599323756e-05, "loss": 0.5436, "step": 6756, "task_loss": 0.24646206200122833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7011735439300537, "epoch": 5.71, "learning_rate": 2.1441251056635672e-05, "loss": 0.5626, "step": 6757, "task_loss": 1.4723469018936157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4954965114593506, "epoch": 5.71, "learning_rate": 2.1437024513947592e-05, "loss": 0.4289, "step": 6758, "task_loss": 0.8803597688674927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41444939374923706, "epoch": 5.71, "learning_rate": 2.1432797971259512e-05, "loss": 0.5857, "step": 6759, "task_loss": 0.8552736639976501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48859903216362, "epoch": 5.71, "learning_rate": 2.1428571428571428e-05, "loss": 0.5045, "step": 6760, "task_loss": 0.5711913108825684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6026777029037476, "epoch": 5.71, "learning_rate": 2.1424344885883348e-05, "loss": 0.5605, "step": 6761, "task_loss": 0.3474797308444977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3489863872528076, "epoch": 5.72, "learning_rate": 2.1420118343195268e-05, "loss": 0.3644, "step": 6762, "task_loss": 0.7322254180908203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5694568753242493, "epoch": 5.72, "learning_rate": 2.1415891800507187e-05, "loss": 0.4569, "step": 6763, "task_loss": 1.4226588010787964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3010168969631195, "epoch": 5.72, "learning_rate": 2.1411665257819104e-05, "loss": 0.4259, "step": 6764, "task_loss": 0.6803930401802063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2720373272895813, "epoch": 5.72, "learning_rate": 2.1407438715131023e-05, "loss": 0.4787, "step": 6765, "task_loss": 0.19917823374271393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6226383447647095, "epoch": 5.72, "learning_rate": 2.1403212172442943e-05, "loss": 0.4914, "step": 6766, "task_loss": 1.3299155235290527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3875640034675598, "epoch": 5.72, "learning_rate": 2.1398985629754863e-05, "loss": 0.4624, "step": 6767, "task_loss": 0.22936205565929413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25801533460617065, "epoch": 5.72, "learning_rate": 2.139475908706678e-05, "loss": 0.4108, "step": 6768, "task_loss": 1.0187067985534668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5304945111274719, "epoch": 5.72, "learning_rate": 2.13905325443787e-05, "loss": 0.4396, "step": 6769, "task_loss": 0.7897787690162659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3565604090690613, "epoch": 5.72, "learning_rate": 2.138630600169062e-05, "loss": 0.415, "step": 6770, "task_loss": 0.25789934396743774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32732632756233215, "epoch": 5.72, "learning_rate": 2.138207945900254e-05, "loss": 0.4699, "step": 6771, "task_loss": 0.6669847369194031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6157450079917908, "epoch": 5.72, "learning_rate": 2.1377852916314455e-05, "loss": 0.4144, "step": 6772, "task_loss": 0.7623788714408875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36396968364715576, "epoch": 5.72, "learning_rate": 2.1373626373626375e-05, "loss": 0.4009, "step": 6773, "task_loss": 0.3830263912677765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48826244473457336, "epoch": 5.73, "learning_rate": 2.1369399830938294e-05, "loss": 0.5586, "step": 6774, "task_loss": 0.3352643847465515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3475797176361084, "epoch": 5.73, "learning_rate": 2.136517328825021e-05, "loss": 0.4314, "step": 6775, "task_loss": 0.15374507009983063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5560595989227295, "epoch": 5.73, "learning_rate": 2.136094674556213e-05, "loss": 0.4307, "step": 6776, "task_loss": 0.48674294352531433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34720557928085327, "epoch": 5.73, "learning_rate": 2.135672020287405e-05, "loss": 0.4748, "step": 6777, "task_loss": 0.4600304067134857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2592812180519104, "epoch": 5.73, "learning_rate": 2.135249366018597e-05, "loss": 0.3788, "step": 6778, "task_loss": 0.42032235860824585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4969314634799957, "epoch": 5.73, "learning_rate": 2.134826711749789e-05, "loss": 0.4663, "step": 6779, "task_loss": 0.9224324226379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39084574580192566, "epoch": 5.73, "learning_rate": 2.1344040574809806e-05, "loss": 0.5807, "step": 6780, "task_loss": 0.17408879101276398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6009945869445801, "epoch": 5.73, "learning_rate": 2.1339814032121726e-05, "loss": 0.5261, "step": 6781, "task_loss": 1.31087064743042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19805112481117249, "epoch": 5.73, "learning_rate": 2.1335587489433645e-05, "loss": 0.4153, "step": 6782, "task_loss": 0.5363072156906128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19592398405075073, "epoch": 5.73, "learning_rate": 2.1331360946745562e-05, "loss": 0.4602, "step": 6783, "task_loss": 0.7822631001472473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5393253564834595, "epoch": 5.73, "learning_rate": 2.1327134404057485e-05, "loss": 0.4271, "step": 6784, "task_loss": 0.3242488503456116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33584100008010864, "epoch": 5.73, "learning_rate": 2.13229078613694e-05, "loss": 0.3779, "step": 6785, "task_loss": 0.40616145730018616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4789864718914032, "epoch": 5.74, "learning_rate": 2.1318681318681318e-05, "loss": 0.4589, "step": 6786, "task_loss": 0.8307963013648987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48436540365219116, "epoch": 5.74, "learning_rate": 2.131445477599324e-05, "loss": 0.6355, "step": 6787, "task_loss": 0.4870648980140686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6092596054077148, "epoch": 5.74, "learning_rate": 2.1310228233305157e-05, "loss": 0.5109, "step": 6788, "task_loss": 0.5954377055168152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2594757080078125, "epoch": 5.74, "learning_rate": 2.1306001690617073e-05, "loss": 0.5333, "step": 6789, "task_loss": 0.15603889524936676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.525944709777832, "epoch": 5.74, "learning_rate": 2.1301775147928997e-05, "loss": 0.5111, "step": 6790, "task_loss": 0.35330891609191895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40124428272247314, "epoch": 5.74, "learning_rate": 2.1297548605240913e-05, "loss": 0.5026, "step": 6791, "task_loss": 0.849157452583313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4003504514694214, "epoch": 5.74, "learning_rate": 2.1293322062552833e-05, "loss": 0.683, "step": 6792, "task_loss": 0.7268747091293335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42773720622062683, "epoch": 5.74, "learning_rate": 2.1289095519864752e-05, "loss": 0.4814, "step": 6793, "task_loss": 0.32103240489959717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5112349390983582, "epoch": 5.74, "learning_rate": 2.128486897717667e-05, "loss": 0.4346, "step": 6794, "task_loss": 0.5772743225097656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5459302067756653, "epoch": 5.74, "learning_rate": 2.128064243448859e-05, "loss": 0.4874, "step": 6795, "task_loss": 1.653991460800171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40330782532691956, "epoch": 5.74, "learning_rate": 2.1276415891800508e-05, "loss": 0.4902, "step": 6796, "task_loss": 0.4302142262458801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3176397383213043, "epoch": 5.75, "learning_rate": 2.1272189349112425e-05, "loss": 0.396, "step": 6797, "task_loss": 0.28809553384780884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6232177019119263, "epoch": 5.75, "learning_rate": 2.1267962806424348e-05, "loss": 0.5835, "step": 6798, "task_loss": 0.3648402988910675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7750144004821777, "epoch": 5.75, "learning_rate": 2.1263736263736264e-05, "loss": 0.5831, "step": 6799, "task_loss": 0.5778810977935791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22234627604484558, "epoch": 5.75, "learning_rate": 2.1259509721048184e-05, "loss": 0.4266, "step": 6800, "task_loss": 0.44662272930145264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6292132139205933, "epoch": 5.75, "learning_rate": 2.1255283178360104e-05, "loss": 0.532, "step": 6801, "task_loss": 1.0702412128448486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9979573488235474, "epoch": 5.75, "learning_rate": 2.125105663567202e-05, "loss": 0.4921, "step": 6802, "task_loss": 0.6450396776199341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31618595123291016, "epoch": 5.75, "learning_rate": 2.124683009298394e-05, "loss": 0.4398, "step": 6803, "task_loss": 0.39285609126091003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39941421151161194, "epoch": 5.75, "learning_rate": 2.124260355029586e-05, "loss": 0.3983, "step": 6804, "task_loss": 0.250224232673645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5914482474327087, "epoch": 5.75, "learning_rate": 2.1238377007607776e-05, "loss": 0.5198, "step": 6805, "task_loss": 1.0823137760162354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34581875801086426, "epoch": 5.75, "learning_rate": 2.1234150464919695e-05, "loss": 0.3962, "step": 6806, "task_loss": 1.0628114938735962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6719406843185425, "epoch": 5.75, "learning_rate": 2.1229923922231615e-05, "loss": 0.5499, "step": 6807, "task_loss": 0.465394526720047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5483078360557556, "epoch": 5.75, "learning_rate": 2.1225697379543535e-05, "loss": 0.623, "step": 6808, "task_loss": 1.5166280269622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5266494154930115, "epoch": 5.76, "learning_rate": 2.1221470836855455e-05, "loss": 0.4517, "step": 6809, "task_loss": 0.45015373826026917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6125856041908264, "epoch": 5.76, "learning_rate": 2.121724429416737e-05, "loss": 0.4907, "step": 6810, "task_loss": 0.13124988973140717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4500581622123718, "epoch": 5.76, "learning_rate": 2.121301775147929e-05, "loss": 0.5281, "step": 6811, "task_loss": 0.13881105184555054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2730002999305725, "epoch": 5.76, "learning_rate": 2.120879120879121e-05, "loss": 0.3554, "step": 6812, "task_loss": 0.5143589973449707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2998513877391815, "epoch": 5.76, "learning_rate": 2.120456466610313e-05, "loss": 0.4916, "step": 6813, "task_loss": 0.7103821635246277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2632826566696167, "epoch": 5.76, "learning_rate": 2.1200338123415047e-05, "loss": 0.2724, "step": 6814, "task_loss": 0.657924473285675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5065885782241821, "epoch": 5.76, "learning_rate": 2.1196111580726966e-05, "loss": 0.4249, "step": 6815, "task_loss": 1.1604305505752563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3876535892486572, "epoch": 5.76, "learning_rate": 2.1191885038038886e-05, "loss": 0.4899, "step": 6816, "task_loss": 0.879410445690155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5263018608093262, "epoch": 5.76, "learning_rate": 2.1187658495350802e-05, "loss": 0.573, "step": 6817, "task_loss": 1.6745277643203735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37311917543411255, "epoch": 5.76, "learning_rate": 2.1183431952662722e-05, "loss": 0.4211, "step": 6818, "task_loss": 0.46173784136772156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5550713539123535, "epoch": 5.76, "learning_rate": 2.1179205409974642e-05, "loss": 0.4666, "step": 6819, "task_loss": 0.998883843421936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6994566321372986, "epoch": 5.76, "learning_rate": 2.117497886728656e-05, "loss": 0.5161, "step": 6820, "task_loss": 0.5035641193389893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14464640617370605, "epoch": 5.77, "learning_rate": 2.117075232459848e-05, "loss": 0.4098, "step": 6821, "task_loss": 0.3938554525375366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21956972777843475, "epoch": 5.77, "learning_rate": 2.1166525781910398e-05, "loss": 0.4419, "step": 6822, "task_loss": 0.08559399098157883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3366870582103729, "epoch": 5.77, "learning_rate": 2.1162299239222317e-05, "loss": 0.5934, "step": 6823, "task_loss": 0.6493999361991882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19049642980098724, "epoch": 5.77, "learning_rate": 2.1158072696534237e-05, "loss": 0.3677, "step": 6824, "task_loss": 0.0458822026848793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5027066469192505, "epoch": 5.77, "learning_rate": 2.1153846153846154e-05, "loss": 0.4415, "step": 6825, "task_loss": 1.1761420965194702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33831173181533813, "epoch": 5.77, "learning_rate": 2.1149619611158073e-05, "loss": 0.4057, "step": 6826, "task_loss": 0.47489920258522034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6495150327682495, "epoch": 5.77, "learning_rate": 2.1145393068469993e-05, "loss": 0.5032, "step": 6827, "task_loss": 0.2836555540561676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.739142656326294, "epoch": 5.77, "learning_rate": 2.114116652578191e-05, "loss": 0.5101, "step": 6828, "task_loss": 0.9493443369865417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4600968658924103, "epoch": 5.77, "learning_rate": 2.1136939983093833e-05, "loss": 0.487, "step": 6829, "task_loss": 0.22272975742816925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4258127510547638, "epoch": 5.77, "learning_rate": 2.113271344040575e-05, "loss": 0.5072, "step": 6830, "task_loss": 0.09586464613676071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41531744599342346, "epoch": 5.77, "learning_rate": 2.112848689771767e-05, "loss": 0.5008, "step": 6831, "task_loss": 0.41358157992362976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.0277512073516846, "epoch": 5.77, "learning_rate": 2.112426035502959e-05, "loss": 0.6109, "step": 6832, "task_loss": 1.2728232145309448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3542889952659607, "epoch": 5.78, "learning_rate": 2.1120033812341505e-05, "loss": 0.5373, "step": 6833, "task_loss": 1.1528089046478271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5336217284202576, "epoch": 5.78, "learning_rate": 2.1115807269653424e-05, "loss": 0.4377, "step": 6834, "task_loss": 0.7546467781066895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3307652771472931, "epoch": 5.78, "learning_rate": 2.1111580726965344e-05, "loss": 0.4796, "step": 6835, "task_loss": 0.6476584672927856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4327883720397949, "epoch": 5.78, "learning_rate": 2.110735418427726e-05, "loss": 0.462, "step": 6836, "task_loss": 0.2784275710582733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24504417181015015, "epoch": 5.78, "learning_rate": 2.1103127641589184e-05, "loss": 0.4415, "step": 6837, "task_loss": 0.560055136680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4066360890865326, "epoch": 5.78, "learning_rate": 2.10989010989011e-05, "loss": 0.6412, "step": 6838, "task_loss": 0.6960163712501526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5056667327880859, "epoch": 5.78, "learning_rate": 2.1094674556213016e-05, "loss": 0.381, "step": 6839, "task_loss": 1.0946102142333984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7472718358039856, "epoch": 5.78, "learning_rate": 2.109044801352494e-05, "loss": 0.5438, "step": 6840, "task_loss": 1.369212031364441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3987520635128021, "epoch": 5.78, "learning_rate": 2.1086221470836856e-05, "loss": 0.4121, "step": 6841, "task_loss": 0.6876682639122009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2448863387107849, "epoch": 5.78, "learning_rate": 2.1081994928148776e-05, "loss": 0.4465, "step": 6842, "task_loss": 0.3026348352432251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6523715257644653, "epoch": 5.78, "learning_rate": 2.1077768385460695e-05, "loss": 0.5269, "step": 6843, "task_loss": 0.2225721776485443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5302640795707703, "epoch": 5.78, "learning_rate": 2.107354184277261e-05, "loss": 0.4623, "step": 6844, "task_loss": 1.5482683181762695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7110546827316284, "epoch": 5.79, "learning_rate": 2.106931530008453e-05, "loss": 0.4753, "step": 6845, "task_loss": 0.47345587611198425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3790833652019501, "epoch": 5.79, "learning_rate": 2.106508875739645e-05, "loss": 0.3771, "step": 6846, "task_loss": 0.20857547223567963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3964225649833679, "epoch": 5.79, "learning_rate": 2.1060862214708367e-05, "loss": 0.4294, "step": 6847, "task_loss": 0.14243261516094208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41636669635772705, "epoch": 5.79, "learning_rate": 2.105663567202029e-05, "loss": 0.4275, "step": 6848, "task_loss": 0.566111147403717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48904043436050415, "epoch": 5.79, "learning_rate": 2.1052409129332207e-05, "loss": 0.3679, "step": 6849, "task_loss": 1.3460726737976074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35177889466285706, "epoch": 5.79, "learning_rate": 2.1048182586644127e-05, "loss": 0.3922, "step": 6850, "task_loss": 0.29798099398612976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5769786834716797, "epoch": 5.79, "learning_rate": 2.1043956043956046e-05, "loss": 0.4667, "step": 6851, "task_loss": 0.3875521719455719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31912124156951904, "epoch": 5.79, "learning_rate": 2.1039729501267963e-05, "loss": 0.4313, "step": 6852, "task_loss": 0.32382670044898987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38843244314193726, "epoch": 5.79, "learning_rate": 2.1035502958579883e-05, "loss": 0.4252, "step": 6853, "task_loss": 1.0679205656051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3662281334400177, "epoch": 5.79, "learning_rate": 2.1031276415891802e-05, "loss": 0.5041, "step": 6854, "task_loss": 0.6846098303794861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5711381435394287, "epoch": 5.79, "learning_rate": 2.102704987320372e-05, "loss": 0.5762, "step": 6855, "task_loss": 0.9002973437309265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5546905994415283, "epoch": 5.79, "learning_rate": 2.102282333051564e-05, "loss": 0.3673, "step": 6856, "task_loss": 0.6081010103225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9307261109352112, "epoch": 5.8, "learning_rate": 2.1018596787827558e-05, "loss": 0.5543, "step": 6857, "task_loss": 1.546901822090149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8297597169876099, "epoch": 5.8, "learning_rate": 2.1014370245139478e-05, "loss": 0.6043, "step": 6858, "task_loss": 1.2760603427886963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.324114590883255, "epoch": 5.8, "learning_rate": 2.1010143702451394e-05, "loss": 0.4176, "step": 6859, "task_loss": 0.6934968829154968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4704315662384033, "epoch": 5.8, "learning_rate": 2.1005917159763314e-05, "loss": 0.4553, "step": 6860, "task_loss": 0.8126189708709717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24674999713897705, "epoch": 5.8, "learning_rate": 2.1001690617075234e-05, "loss": 0.3397, "step": 6861, "task_loss": 0.5640186071395874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5420013666152954, "epoch": 5.8, "learning_rate": 2.0997464074387153e-05, "loss": 0.4965, "step": 6862, "task_loss": 1.0041961669921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5000835061073303, "epoch": 5.8, "learning_rate": 2.099323753169907e-05, "loss": 0.4163, "step": 6863, "task_loss": 1.16087007522583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6482635736465454, "epoch": 5.8, "learning_rate": 2.098901098901099e-05, "loss": 0.4986, "step": 6864, "task_loss": 0.6807727813720703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15824642777442932, "epoch": 5.8, "learning_rate": 2.098478444632291e-05, "loss": 0.4103, "step": 6865, "task_loss": 0.007307421416044235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5773793458938599, "epoch": 5.8, "learning_rate": 2.098055790363483e-05, "loss": 0.4147, "step": 6866, "task_loss": 0.7449727654457092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3035845160484314, "epoch": 5.8, "learning_rate": 2.0976331360946745e-05, "loss": 0.5636, "step": 6867, "task_loss": 0.35404837131500244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4838506877422333, "epoch": 5.81, "learning_rate": 2.0972104818258665e-05, "loss": 0.4828, "step": 6868, "task_loss": 1.5215617418289185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.417070209980011, "epoch": 5.81, "learning_rate": 2.0967878275570585e-05, "loss": 0.4846, "step": 6869, "task_loss": 0.8895341157913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6175264120101929, "epoch": 5.81, "learning_rate": 2.09636517328825e-05, "loss": 0.4822, "step": 6870, "task_loss": 0.7183728814125061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4873811602592468, "epoch": 5.81, "learning_rate": 2.0959425190194424e-05, "loss": 0.4678, "step": 6871, "task_loss": 0.606157660484314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5087961554527283, "epoch": 5.81, "learning_rate": 2.095519864750634e-05, "loss": 0.4193, "step": 6872, "task_loss": 1.1622354984283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5356823801994324, "epoch": 5.81, "learning_rate": 2.095097210481826e-05, "loss": 0.487, "step": 6873, "task_loss": 0.3554946482181549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29089847207069397, "epoch": 5.81, "learning_rate": 2.094674556213018e-05, "loss": 0.324, "step": 6874, "task_loss": 0.3924228250980377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24230298399925232, "epoch": 5.81, "learning_rate": 2.0942519019442096e-05, "loss": 0.4417, "step": 6875, "task_loss": 0.322633296251297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7427131533622742, "epoch": 5.81, "learning_rate": 2.0938292476754016e-05, "loss": 0.513, "step": 6876, "task_loss": 0.43294665217399597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41000860929489136, "epoch": 5.81, "learning_rate": 2.0934065934065936e-05, "loss": 0.3547, "step": 6877, "task_loss": 0.518623411655426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32185161113739014, "epoch": 5.81, "learning_rate": 2.0929839391377852e-05, "loss": 0.4657, "step": 6878, "task_loss": 0.6284551620483398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3538426160812378, "epoch": 5.81, "learning_rate": 2.0925612848689775e-05, "loss": 0.5171, "step": 6879, "task_loss": 0.8997548818588257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43647468090057373, "epoch": 5.82, "learning_rate": 2.0921386306001692e-05, "loss": 0.4572, "step": 6880, "task_loss": 0.1140788197517395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30595213174819946, "epoch": 5.82, "learning_rate": 2.0917159763313608e-05, "loss": 0.4401, "step": 6881, "task_loss": 0.7451620101928711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33447811007499695, "epoch": 5.82, "learning_rate": 2.091293322062553e-05, "loss": 0.4045, "step": 6882, "task_loss": 0.7599992156028748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4289398491382599, "epoch": 5.82, "learning_rate": 2.0908706677937448e-05, "loss": 0.4473, "step": 6883, "task_loss": 0.5802550315856934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4537455439567566, "epoch": 5.82, "learning_rate": 2.0904480135249367e-05, "loss": 0.3273, "step": 6884, "task_loss": 0.8239174485206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23544958233833313, "epoch": 5.82, "learning_rate": 2.0900253592561287e-05, "loss": 0.3563, "step": 6885, "task_loss": 0.09476306289434433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3304581046104431, "epoch": 5.82, "learning_rate": 2.0896027049873203e-05, "loss": 0.4708, "step": 6886, "task_loss": 0.8620635867118835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6766111254692078, "epoch": 5.82, "learning_rate": 2.0891800507185123e-05, "loss": 0.5107, "step": 6887, "task_loss": 1.0529582500457764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30495965480804443, "epoch": 5.82, "learning_rate": 2.0887573964497043e-05, "loss": 0.3258, "step": 6888, "task_loss": 0.3499838709831238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6093863844871521, "epoch": 5.82, "learning_rate": 2.088334742180896e-05, "loss": 0.4456, "step": 6889, "task_loss": 0.4287920594215393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3391032814979553, "epoch": 5.82, "learning_rate": 2.0879120879120882e-05, "loss": 0.4857, "step": 6890, "task_loss": 0.9017887115478516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36752891540527344, "epoch": 5.82, "learning_rate": 2.08748943364328e-05, "loss": 0.4111, "step": 6891, "task_loss": 0.4394024908542633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3980378806591034, "epoch": 5.83, "learning_rate": 2.0870667793744715e-05, "loss": 0.4146, "step": 6892, "task_loss": 0.021777141839265823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3142988681793213, "epoch": 5.83, "learning_rate": 2.0866441251056638e-05, "loss": 0.4208, "step": 6893, "task_loss": 0.27094897627830505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4553554356098175, "epoch": 5.83, "learning_rate": 2.0862214708368555e-05, "loss": 0.5404, "step": 6894, "task_loss": 1.224256157875061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2626907527446747, "epoch": 5.83, "learning_rate": 2.0857988165680474e-05, "loss": 0.3815, "step": 6895, "task_loss": 0.855850100517273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.531815230846405, "epoch": 5.83, "learning_rate": 2.0853761622992394e-05, "loss": 0.5996, "step": 6896, "task_loss": 0.5966977477073669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31711798906326294, "epoch": 5.83, "learning_rate": 2.084953508030431e-05, "loss": 0.4046, "step": 6897, "task_loss": 0.6825155019760132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38439205288887024, "epoch": 5.83, "learning_rate": 2.084530853761623e-05, "loss": 0.5293, "step": 6898, "task_loss": 0.8456814885139465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15853556990623474, "epoch": 5.83, "learning_rate": 2.084108199492815e-05, "loss": 0.3849, "step": 6899, "task_loss": 0.06816264241933823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5972846746444702, "epoch": 5.83, "learning_rate": 2.083685545224007e-05, "loss": 0.4852, "step": 6900, "task_loss": 1.376851201057434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.455689936876297, "epoch": 5.83, "learning_rate": 2.083262890955199e-05, "loss": 0.4725, "step": 6901, "task_loss": 0.5702486038208008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46228501200675964, "epoch": 5.83, "learning_rate": 2.0828402366863906e-05, "loss": 0.4756, "step": 6902, "task_loss": 0.4654257893562317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3355134129524231, "epoch": 5.83, "learning_rate": 2.0824175824175825e-05, "loss": 0.4657, "step": 6903, "task_loss": 1.098803162574768 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2770090103149414, "epoch": 5.84, "learning_rate": 2.0819949281487745e-05, "loss": 0.3887, "step": 6904, "task_loss": 0.6073124408721924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2100699096918106, "epoch": 5.84, "learning_rate": 2.081572273879966e-05, "loss": 0.3216, "step": 6905, "task_loss": 0.21566464006900787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29921281337738037, "epoch": 5.84, "learning_rate": 2.081149619611158e-05, "loss": 0.3604, "step": 6906, "task_loss": 0.45757758617401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25455623865127563, "epoch": 5.84, "learning_rate": 2.08072696534235e-05, "loss": 0.3524, "step": 6907, "task_loss": 0.07059229165315628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5089364051818848, "epoch": 5.84, "learning_rate": 2.080304311073542e-05, "loss": 0.4101, "step": 6908, "task_loss": 0.5950535535812378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44800323247909546, "epoch": 5.84, "learning_rate": 2.0798816568047337e-05, "loss": 0.4616, "step": 6909, "task_loss": 0.80549156665802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3771620988845825, "epoch": 5.84, "learning_rate": 2.0794590025359257e-05, "loss": 0.4364, "step": 6910, "task_loss": 1.1688623428344727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.546317458152771, "epoch": 5.84, "learning_rate": 2.0790363482671177e-05, "loss": 0.5008, "step": 6911, "task_loss": 0.38493847846984863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48115646839141846, "epoch": 5.84, "learning_rate": 2.0786136939983096e-05, "loss": 0.5804, "step": 6912, "task_loss": 1.0825543403625488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44323229789733887, "epoch": 5.84, "learning_rate": 2.0781910397295013e-05, "loss": 0.462, "step": 6913, "task_loss": 1.199125051498413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.680584192276001, "epoch": 5.84, "learning_rate": 2.0777683854606932e-05, "loss": 0.4516, "step": 6914, "task_loss": 0.4622798562049866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29810604453086853, "epoch": 5.84, "learning_rate": 2.0773457311918852e-05, "loss": 0.4058, "step": 6915, "task_loss": 0.6172472238540649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6477153301239014, "epoch": 5.85, "learning_rate": 2.0769230769230772e-05, "loss": 0.4511, "step": 6916, "task_loss": 0.7039207816123962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23615384101867676, "epoch": 5.85, "learning_rate": 2.0765004226542688e-05, "loss": 0.362, "step": 6917, "task_loss": 0.24976134300231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43328922986984253, "epoch": 5.85, "learning_rate": 2.0760777683854608e-05, "loss": 0.5557, "step": 6918, "task_loss": 0.6124016642570496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.678520679473877, "epoch": 5.85, "learning_rate": 2.0756551141166528e-05, "loss": 0.5384, "step": 6919, "task_loss": 0.5957140922546387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5203990340232849, "epoch": 5.85, "learning_rate": 2.0752324598478444e-05, "loss": 0.4904, "step": 6920, "task_loss": 1.1569639444351196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.555241048336029, "epoch": 5.85, "learning_rate": 2.0748098055790364e-05, "loss": 0.4127, "step": 6921, "task_loss": 0.5554826855659485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3438481092453003, "epoch": 5.85, "learning_rate": 2.0743871513102283e-05, "loss": 0.4762, "step": 6922, "task_loss": 0.9312421679496765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6765172481536865, "epoch": 5.85, "learning_rate": 2.07396449704142e-05, "loss": 0.4982, "step": 6923, "task_loss": 0.7163085341453552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33975645899772644, "epoch": 5.85, "learning_rate": 2.0735418427726123e-05, "loss": 0.3875, "step": 6924, "task_loss": 0.3946218192577362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4212139844894409, "epoch": 5.85, "learning_rate": 2.073119188503804e-05, "loss": 0.5296, "step": 6925, "task_loss": 0.18994076550006866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2789541482925415, "epoch": 5.85, "learning_rate": 2.072696534234996e-05, "loss": 0.3817, "step": 6926, "task_loss": 0.21240371465682983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8152321577072144, "epoch": 5.85, "learning_rate": 2.072273879966188e-05, "loss": 0.6065, "step": 6927, "task_loss": 0.381613552570343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4205235242843628, "epoch": 5.86, "learning_rate": 2.0718512256973795e-05, "loss": 0.4891, "step": 6928, "task_loss": 0.697672426700592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2164396047592163, "epoch": 5.86, "learning_rate": 2.0714285714285718e-05, "loss": 0.4187, "step": 6929, "task_loss": 0.35104691982269287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5050743818283081, "epoch": 5.86, "learning_rate": 2.0710059171597635e-05, "loss": 0.4805, "step": 6930, "task_loss": 0.3537675142288208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.831802248954773, "epoch": 5.86, "learning_rate": 2.070583262890955e-05, "loss": 0.6558, "step": 6931, "task_loss": 1.1591541767120361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39277392625808716, "epoch": 5.86, "learning_rate": 2.0701606086221474e-05, "loss": 0.5059, "step": 6932, "task_loss": 1.1958526372909546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4644114673137665, "epoch": 5.86, "learning_rate": 2.069737954353339e-05, "loss": 0.5094, "step": 6933, "task_loss": 0.4394320249557495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.415141761302948, "epoch": 5.86, "learning_rate": 2.0693153000845307e-05, "loss": 0.482, "step": 6934, "task_loss": 0.2355649620294571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5091354846954346, "epoch": 5.86, "learning_rate": 2.068892645815723e-05, "loss": 0.3388, "step": 6935, "task_loss": 0.8844378590583801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6866999268531799, "epoch": 5.86, "learning_rate": 2.0684699915469146e-05, "loss": 0.4656, "step": 6936, "task_loss": 0.6222223043441772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4982926547527313, "epoch": 5.86, "learning_rate": 2.0680473372781066e-05, "loss": 0.4589, "step": 6937, "task_loss": 0.4388187527656555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3379089832305908, "epoch": 5.86, "learning_rate": 2.0676246830092986e-05, "loss": 0.3769, "step": 6938, "task_loss": 0.06551823765039444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2937958240509033, "epoch": 5.87, "learning_rate": 2.0672020287404902e-05, "loss": 0.3521, "step": 6939, "task_loss": 0.5402923822402954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5408083200454712, "epoch": 5.87, "learning_rate": 2.0667793744716822e-05, "loss": 0.46, "step": 6940, "task_loss": 0.49334830045700073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34332144260406494, "epoch": 5.87, "learning_rate": 2.066356720202874e-05, "loss": 0.4943, "step": 6941, "task_loss": 0.8118143677711487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4961376190185547, "epoch": 5.87, "learning_rate": 2.0659340659340658e-05, "loss": 0.513, "step": 6942, "task_loss": 0.12239488959312439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48297691345214844, "epoch": 5.87, "learning_rate": 2.065511411665258e-05, "loss": 0.5205, "step": 6943, "task_loss": 0.24743522703647614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3032165467739105, "epoch": 5.87, "learning_rate": 2.0650887573964497e-05, "loss": 0.4141, "step": 6944, "task_loss": 0.8442246317863464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2457922101020813, "epoch": 5.87, "learning_rate": 2.0646661031276417e-05, "loss": 0.4021, "step": 6945, "task_loss": 0.9032883644104004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2674260437488556, "epoch": 5.87, "learning_rate": 2.0642434488588337e-05, "loss": 0.4044, "step": 6946, "task_loss": 0.4631931781768799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5523655414581299, "epoch": 5.87, "learning_rate": 2.0638207945900253e-05, "loss": 0.4694, "step": 6947, "task_loss": 0.5869618058204651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5839467644691467, "epoch": 5.87, "learning_rate": 2.0633981403212173e-05, "loss": 0.5043, "step": 6948, "task_loss": 1.1385022401809692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2565206289291382, "epoch": 5.87, "learning_rate": 2.0629754860524093e-05, "loss": 0.3683, "step": 6949, "task_loss": 0.17148961126804352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6339231729507446, "epoch": 5.87, "learning_rate": 2.062552831783601e-05, "loss": 0.5042, "step": 6950, "task_loss": 0.5123355984687805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5382684469223022, "epoch": 5.88, "learning_rate": 2.062130177514793e-05, "loss": 0.5302, "step": 6951, "task_loss": 1.1977189779281616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2415362298488617, "epoch": 5.88, "learning_rate": 2.061707523245985e-05, "loss": 0.4493, "step": 6952, "task_loss": 0.3175063729286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4160362482070923, "epoch": 5.88, "learning_rate": 2.0612848689771768e-05, "loss": 0.3986, "step": 6953, "task_loss": 1.3243874311447144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.224104106426239, "epoch": 5.88, "learning_rate": 2.0608622147083688e-05, "loss": 0.4484, "step": 6954, "task_loss": 0.07137873768806458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5895787477493286, "epoch": 5.88, "learning_rate": 2.0604395604395604e-05, "loss": 0.4326, "step": 6955, "task_loss": 0.6851025819778442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5832515954971313, "epoch": 5.88, "learning_rate": 2.0600169061707524e-05, "loss": 0.5185, "step": 6956, "task_loss": 0.5652205944061279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5279256105422974, "epoch": 5.88, "learning_rate": 2.0595942519019444e-05, "loss": 0.5575, "step": 6957, "task_loss": 0.8310483694076538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4228052794933319, "epoch": 5.88, "learning_rate": 2.0591715976331364e-05, "loss": 0.3808, "step": 6958, "task_loss": 0.7282111644744873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47618064284324646, "epoch": 5.88, "learning_rate": 2.058748943364328e-05, "loss": 0.5138, "step": 6959, "task_loss": 0.8880327343940735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39084964990615845, "epoch": 5.88, "learning_rate": 2.05832628909552e-05, "loss": 0.5013, "step": 6960, "task_loss": 1.4400535821914673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.70611572265625, "epoch": 5.88, "learning_rate": 2.057903634826712e-05, "loss": 0.5149, "step": 6961, "task_loss": 0.5581108927726746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33923280239105225, "epoch": 5.88, "learning_rate": 2.0574809805579036e-05, "loss": 0.4218, "step": 6962, "task_loss": 0.6265366077423096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.294910728931427, "epoch": 5.89, "learning_rate": 2.0570583262890956e-05, "loss": 0.4683, "step": 6963, "task_loss": 0.3881951868534088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.249252051115036, "epoch": 5.89, "learning_rate": 2.0566356720202875e-05, "loss": 0.3408, "step": 6964, "task_loss": 0.16938893496990204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34031033515930176, "epoch": 5.89, "learning_rate": 2.0562130177514795e-05, "loss": 0.5201, "step": 6965, "task_loss": 1.5841153860092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2765214145183563, "epoch": 5.89, "learning_rate": 2.0557903634826715e-05, "loss": 0.4168, "step": 6966, "task_loss": 0.14913053810596466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6033191680908203, "epoch": 5.89, "learning_rate": 2.055367709213863e-05, "loss": 0.5496, "step": 6967, "task_loss": 0.7337641716003418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2983512580394745, "epoch": 5.89, "learning_rate": 2.054945054945055e-05, "loss": 0.3941, "step": 6968, "task_loss": 0.4973376989364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5667001008987427, "epoch": 5.89, "learning_rate": 2.054522400676247e-05, "loss": 0.4624, "step": 6969, "task_loss": 1.239099383354187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15192481875419617, "epoch": 5.89, "learning_rate": 2.0540997464074387e-05, "loss": 0.2596, "step": 6970, "task_loss": 0.08259785920381546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3858349919319153, "epoch": 5.89, "learning_rate": 2.0536770921386307e-05, "loss": 0.4214, "step": 6971, "task_loss": 0.36624372005462646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5908135771751404, "epoch": 5.89, "learning_rate": 2.0532544378698226e-05, "loss": 0.4897, "step": 6972, "task_loss": 1.1913702487945557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814309239387512, "epoch": 5.89, "learning_rate": 2.0528317836010143e-05, "loss": 0.3859, "step": 6973, "task_loss": 0.7360821962356567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.465462863445282, "epoch": 5.89, "learning_rate": 2.0524091293322066e-05, "loss": 0.5608, "step": 6974, "task_loss": 1.012315273284912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5127521753311157, "epoch": 5.9, "learning_rate": 2.0519864750633982e-05, "loss": 0.5553, "step": 6975, "task_loss": 0.7014459371566772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4636867046356201, "epoch": 5.9, "learning_rate": 2.0515638207945902e-05, "loss": 0.4922, "step": 6976, "task_loss": 0.5733063817024231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5170900821685791, "epoch": 5.9, "learning_rate": 2.051141166525782e-05, "loss": 0.6136, "step": 6977, "task_loss": 1.0354149341583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41047024726867676, "epoch": 5.9, "learning_rate": 2.0507185122569738e-05, "loss": 0.6041, "step": 6978, "task_loss": 0.6598640084266663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1919073611497879, "epoch": 5.9, "learning_rate": 2.0502958579881658e-05, "loss": 0.5014, "step": 6979, "task_loss": 0.19847360253334045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49224385619163513, "epoch": 5.9, "learning_rate": 2.0498732037193578e-05, "loss": 0.4844, "step": 6980, "task_loss": 0.5532199740409851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5924396514892578, "epoch": 5.9, "learning_rate": 2.0494505494505494e-05, "loss": 0.4928, "step": 6981, "task_loss": 0.510155439376831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2171267867088318, "epoch": 5.9, "learning_rate": 2.0490278951817417e-05, "loss": 0.4683, "step": 6982, "task_loss": 0.4478294253349304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31343963742256165, "epoch": 5.9, "learning_rate": 2.0486052409129333e-05, "loss": 0.5461, "step": 6983, "task_loss": 0.28888171911239624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30224156379699707, "epoch": 5.9, "learning_rate": 2.048182586644125e-05, "loss": 0.4037, "step": 6984, "task_loss": 0.49089041352272034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33464598655700684, "epoch": 5.9, "learning_rate": 2.0477599323753173e-05, "loss": 0.44, "step": 6985, "task_loss": 0.26264408230781555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4295983910560608, "epoch": 5.9, "learning_rate": 2.047337278106509e-05, "loss": 0.3686, "step": 6986, "task_loss": 0.5043914914131165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29914841055870056, "epoch": 5.91, "learning_rate": 2.046914623837701e-05, "loss": 0.3713, "step": 6987, "task_loss": 0.22969037294387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.665727972984314, "epoch": 5.91, "learning_rate": 2.046491969568893e-05, "loss": 0.5371, "step": 6988, "task_loss": 1.1917181015014648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3700661063194275, "epoch": 5.91, "learning_rate": 2.0460693153000845e-05, "loss": 0.3794, "step": 6989, "task_loss": 0.5996010899543762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48665523529052734, "epoch": 5.91, "learning_rate": 2.0456466610312765e-05, "loss": 0.4218, "step": 6990, "task_loss": 1.3244385719299316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4173729121685028, "epoch": 5.91, "learning_rate": 2.0452240067624684e-05, "loss": 0.3932, "step": 6991, "task_loss": 1.0321753025054932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5391624569892883, "epoch": 5.91, "learning_rate": 2.04480135249366e-05, "loss": 0.4709, "step": 6992, "task_loss": 0.3678254187107086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34529024362564087, "epoch": 5.91, "learning_rate": 2.0443786982248524e-05, "loss": 0.4229, "step": 6993, "task_loss": 0.4369000792503357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3824813961982727, "epoch": 5.91, "learning_rate": 2.043956043956044e-05, "loss": 0.4521, "step": 6994, "task_loss": 1.2072104215621948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6146224737167358, "epoch": 5.91, "learning_rate": 2.043533389687236e-05, "loss": 0.5808, "step": 6995, "task_loss": 1.2046154737472534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29365992546081543, "epoch": 5.91, "learning_rate": 2.043110735418428e-05, "loss": 0.4759, "step": 6996, "task_loss": 0.5661774277687073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5630391836166382, "epoch": 5.91, "learning_rate": 2.0426880811496196e-05, "loss": 0.4977, "step": 6997, "task_loss": 1.1067628860473633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5411484241485596, "epoch": 5.91, "learning_rate": 2.0422654268808116e-05, "loss": 0.5681, "step": 6998, "task_loss": 0.7602152824401855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5079545974731445, "epoch": 5.92, "learning_rate": 2.0418427726120036e-05, "loss": 0.5346, "step": 6999, "task_loss": 0.4370797872543335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45872044563293457, "epoch": 5.92, "learning_rate": 2.0414201183431952e-05, "loss": 0.4423, "step": 7000, "task_loss": 0.49306046962738037 }, { "epoch": 5.92, "eval_accuracy": 0.9091881188118812, "eval_loss": 0.30122703313827515, "eval_runtime": 229.1684, "eval_samples_per_second": 110.181, "eval_steps_per_second": 0.864, "step": 7000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38473546504974365, "epoch": 5.92, "learning_rate": 2.0409974640743872e-05, "loss": 0.5171, "step": 7001, "task_loss": 0.6840525269508362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5666130185127258, "epoch": 5.92, "learning_rate": 2.040574809805579e-05, "loss": 0.5257, "step": 7002, "task_loss": 0.4802851974964142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36271658539772034, "epoch": 5.92, "learning_rate": 2.040152155536771e-05, "loss": 0.4802, "step": 7003, "task_loss": 0.7734671235084534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5573059320449829, "epoch": 5.92, "learning_rate": 2.0397295012679628e-05, "loss": 0.511, "step": 7004, "task_loss": 0.5650498270988464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2098025381565094, "epoch": 5.92, "learning_rate": 2.0393068469991547e-05, "loss": 0.5231, "step": 7005, "task_loss": 0.08263315260410309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2708004415035248, "epoch": 5.92, "learning_rate": 2.0388841927303467e-05, "loss": 0.4776, "step": 7006, "task_loss": 0.03214747831225395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7136062383651733, "epoch": 5.92, "learning_rate": 2.0384615384615387e-05, "loss": 0.5654, "step": 7007, "task_loss": 1.1165952682495117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36361658573150635, "epoch": 5.92, "learning_rate": 2.0380388841927303e-05, "loss": 0.5594, "step": 7008, "task_loss": 0.8413437008857727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1874292641878128, "epoch": 5.92, "learning_rate": 2.0376162299239223e-05, "loss": 0.3775, "step": 7009, "task_loss": 0.44430458545684814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.037595510482788, "epoch": 5.93, "learning_rate": 2.0371935756551143e-05, "loss": 0.534, "step": 7010, "task_loss": 1.4111241102218628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3515136241912842, "epoch": 5.93, "learning_rate": 2.0367709213863062e-05, "loss": 0.3286, "step": 7011, "task_loss": 0.40704452991485596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6027913093566895, "epoch": 5.93, "learning_rate": 2.036348267117498e-05, "loss": 0.5005, "step": 7012, "task_loss": 0.6376664638519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4948940873146057, "epoch": 5.93, "learning_rate": 2.03592561284869e-05, "loss": 0.5056, "step": 7013, "task_loss": 0.7439824342727661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5963187217712402, "epoch": 5.93, "learning_rate": 2.0355029585798818e-05, "loss": 0.4659, "step": 7014, "task_loss": 0.7507460117340088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7237182855606079, "epoch": 5.93, "learning_rate": 2.0350803043110734e-05, "loss": 0.6112, "step": 7015, "task_loss": 0.6702473163604736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29223304986953735, "epoch": 5.93, "learning_rate": 2.0346576500422658e-05, "loss": 0.4424, "step": 7016, "task_loss": 0.7347798347473145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46810343861579895, "epoch": 5.93, "learning_rate": 2.0342349957734574e-05, "loss": 0.4677, "step": 7017, "task_loss": 0.37034061551094055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6313290596008301, "epoch": 5.93, "learning_rate": 2.0338123415046494e-05, "loss": 0.5498, "step": 7018, "task_loss": 0.4862879812717438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36205318570137024, "epoch": 5.93, "learning_rate": 2.0333896872358413e-05, "loss": 0.419, "step": 7019, "task_loss": 0.7600081562995911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4158157706260681, "epoch": 5.93, "learning_rate": 2.032967032967033e-05, "loss": 0.3667, "step": 7020, "task_loss": 0.4066627025604248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4933263063430786, "epoch": 5.93, "learning_rate": 2.032544378698225e-05, "loss": 0.4776, "step": 7021, "task_loss": 0.3642802834510803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3373827338218689, "epoch": 5.94, "learning_rate": 2.032121724429417e-05, "loss": 0.4252, "step": 7022, "task_loss": 0.8688920736312866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3612859845161438, "epoch": 5.94, "learning_rate": 2.0316990701606086e-05, "loss": 0.4093, "step": 7023, "task_loss": 0.61452317237854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28338301181793213, "epoch": 5.94, "learning_rate": 2.031276415891801e-05, "loss": 0.3613, "step": 7024, "task_loss": 0.6012794971466064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43837082386016846, "epoch": 5.94, "learning_rate": 2.0308537616229925e-05, "loss": 0.4128, "step": 7025, "task_loss": 0.5211491584777832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45048001408576965, "epoch": 5.94, "learning_rate": 2.030431107354184e-05, "loss": 0.4912, "step": 7026, "task_loss": 0.1650909036397934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5176198482513428, "epoch": 5.94, "learning_rate": 2.0300084530853765e-05, "loss": 0.37, "step": 7027, "task_loss": 1.0422096252441406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43670836091041565, "epoch": 5.94, "learning_rate": 2.029585798816568e-05, "loss": 0.4958, "step": 7028, "task_loss": 0.7880675196647644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8128395080566406, "epoch": 5.94, "learning_rate": 2.02916314454776e-05, "loss": 0.529, "step": 7029, "task_loss": 1.2211318016052246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4492298364639282, "epoch": 5.94, "learning_rate": 2.028740490278952e-05, "loss": 0.4517, "step": 7030, "task_loss": 0.5898701548576355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3042801022529602, "epoch": 5.94, "learning_rate": 2.0283178360101437e-05, "loss": 0.4507, "step": 7031, "task_loss": 0.8023127913475037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5930072069168091, "epoch": 5.94, "learning_rate": 2.0278951817413356e-05, "loss": 0.4801, "step": 7032, "task_loss": 1.1964970827102661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5841115713119507, "epoch": 5.94, "learning_rate": 2.0274725274725276e-05, "loss": 0.5234, "step": 7033, "task_loss": 1.2644555568695068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.528667688369751, "epoch": 5.95, "learning_rate": 2.0270498732037193e-05, "loss": 0.4742, "step": 7034, "task_loss": 0.5575354099273682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5134127736091614, "epoch": 5.95, "learning_rate": 2.0266272189349116e-05, "loss": 0.4775, "step": 7035, "task_loss": 0.23523935675621033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47749000787734985, "epoch": 5.95, "learning_rate": 2.0262045646661032e-05, "loss": 0.5308, "step": 7036, "task_loss": 0.5624517798423767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5227967500686646, "epoch": 5.95, "learning_rate": 2.025781910397295e-05, "loss": 0.5867, "step": 7037, "task_loss": 0.32910606265068054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3845481276512146, "epoch": 5.95, "learning_rate": 2.025359256128487e-05, "loss": 0.4294, "step": 7038, "task_loss": 0.6235036849975586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5552660226821899, "epoch": 5.95, "learning_rate": 2.0249366018596788e-05, "loss": 0.5164, "step": 7039, "task_loss": 1.9431121349334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4469095468521118, "epoch": 5.95, "learning_rate": 2.0245139475908708e-05, "loss": 0.4224, "step": 7040, "task_loss": 0.23226583003997803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5176308155059814, "epoch": 5.95, "learning_rate": 2.0240912933220627e-05, "loss": 0.4308, "step": 7041, "task_loss": 0.2558535933494568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5335114598274231, "epoch": 5.95, "learning_rate": 2.0236686390532544e-05, "loss": 0.4012, "step": 7042, "task_loss": 0.44462838768959045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47712442278862, "epoch": 5.95, "learning_rate": 2.0232459847844463e-05, "loss": 0.3853, "step": 7043, "task_loss": 1.314573884010315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5327378511428833, "epoch": 5.95, "learning_rate": 2.0228233305156383e-05, "loss": 0.6413, "step": 7044, "task_loss": 1.06952965259552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.272204726934433, "epoch": 5.95, "learning_rate": 2.0224006762468303e-05, "loss": 0.398, "step": 7045, "task_loss": 0.6054376363754272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4695577323436737, "epoch": 5.96, "learning_rate": 2.0219780219780223e-05, "loss": 0.3974, "step": 7046, "task_loss": 0.14660142362117767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4648245573043823, "epoch": 5.96, "learning_rate": 2.021555367709214e-05, "loss": 0.5524, "step": 7047, "task_loss": 0.7094641923904419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3122267723083496, "epoch": 5.96, "learning_rate": 2.021132713440406e-05, "loss": 0.3384, "step": 7048, "task_loss": 1.0159333944320679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37727344036102295, "epoch": 5.96, "learning_rate": 2.020710059171598e-05, "loss": 0.4847, "step": 7049, "task_loss": 0.6171429753303528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4436185359954834, "epoch": 5.96, "learning_rate": 2.0202874049027895e-05, "loss": 0.3532, "step": 7050, "task_loss": 1.3608589172363281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5559684038162231, "epoch": 5.96, "learning_rate": 2.0198647506339815e-05, "loss": 0.4823, "step": 7051, "task_loss": 0.3221512734889984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2834300994873047, "epoch": 5.96, "learning_rate": 2.0194420963651734e-05, "loss": 0.4014, "step": 7052, "task_loss": 0.995764434337616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6256246566772461, "epoch": 5.96, "learning_rate": 2.0190194420963654e-05, "loss": 0.53, "step": 7053, "task_loss": 0.5288268327713013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5749763250350952, "epoch": 5.96, "learning_rate": 2.018596787827557e-05, "loss": 0.4529, "step": 7054, "task_loss": 1.5266227722167969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.513207197189331, "epoch": 5.96, "learning_rate": 2.018174133558749e-05, "loss": 0.6147, "step": 7055, "task_loss": 0.6271253228187561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41158199310302734, "epoch": 5.96, "learning_rate": 2.017751479289941e-05, "loss": 0.46, "step": 7056, "task_loss": 0.3153323829174042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3458254337310791, "epoch": 5.96, "learning_rate": 2.017328825021133e-05, "loss": 0.4521, "step": 7057, "task_loss": 0.6203638315200806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31062930822372437, "epoch": 5.97, "learning_rate": 2.0169061707523246e-05, "loss": 0.3472, "step": 7058, "task_loss": 0.4665345549583435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3848392069339752, "epoch": 5.97, "learning_rate": 2.0164835164835166e-05, "loss": 0.508, "step": 7059, "task_loss": 0.5620155334472656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4599146246910095, "epoch": 5.97, "learning_rate": 2.0160608622147085e-05, "loss": 0.4409, "step": 7060, "task_loss": 0.867586076259613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42547208070755005, "epoch": 5.97, "learning_rate": 2.0156382079459005e-05, "loss": 0.5965, "step": 7061, "task_loss": 0.8307308554649353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35151779651641846, "epoch": 5.97, "learning_rate": 2.015215553677092e-05, "loss": 0.51, "step": 7062, "task_loss": 0.17829468846321106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24665094912052155, "epoch": 5.97, "learning_rate": 2.014792899408284e-05, "loss": 0.396, "step": 7063, "task_loss": 0.21072585880756378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3609168529510498, "epoch": 5.97, "learning_rate": 2.014370245139476e-05, "loss": 0.376, "step": 7064, "task_loss": 0.3609398305416107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.611214280128479, "epoch": 5.97, "learning_rate": 2.0139475908706677e-05, "loss": 0.629, "step": 7065, "task_loss": 1.013014316558838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3529215455055237, "epoch": 5.97, "learning_rate": 2.0135249366018597e-05, "loss": 0.4472, "step": 7066, "task_loss": 0.13304711878299713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8130069375038147, "epoch": 5.97, "learning_rate": 2.0131022823330517e-05, "loss": 0.4689, "step": 7067, "task_loss": 0.9110226035118103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4751279652118683, "epoch": 5.97, "learning_rate": 2.0126796280642433e-05, "loss": 0.3487, "step": 7068, "task_loss": 0.8028808236122131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41768959164619446, "epoch": 5.97, "learning_rate": 2.0122569737954356e-05, "loss": 0.47, "step": 7069, "task_loss": 0.8124250173568726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2917250394821167, "epoch": 5.98, "learning_rate": 2.0118343195266273e-05, "loss": 0.4199, "step": 7070, "task_loss": 0.29982373118400574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7308906316757202, "epoch": 5.98, "learning_rate": 2.0114116652578192e-05, "loss": 0.4934, "step": 7071, "task_loss": 1.0027003288269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26983407139778137, "epoch": 5.98, "learning_rate": 2.0109890109890112e-05, "loss": 0.3916, "step": 7072, "task_loss": 0.830588698387146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.332949161529541, "epoch": 5.98, "learning_rate": 2.010566356720203e-05, "loss": 0.4469, "step": 7073, "task_loss": 0.4459330439567566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28934359550476074, "epoch": 5.98, "learning_rate": 2.0101437024513948e-05, "loss": 0.3807, "step": 7074, "task_loss": 0.2873598039150238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28923001885414124, "epoch": 5.98, "learning_rate": 2.0097210481825868e-05, "loss": 0.4862, "step": 7075, "task_loss": 0.894254207611084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5095264315605164, "epoch": 5.98, "learning_rate": 2.0092983939137784e-05, "loss": 0.3617, "step": 7076, "task_loss": 0.4393921196460724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26413285732269287, "epoch": 5.98, "learning_rate": 2.0088757396449707e-05, "loss": 0.3413, "step": 7077, "task_loss": 0.42026636004447937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2640991508960724, "epoch": 5.98, "learning_rate": 2.0084530853761624e-05, "loss": 0.3601, "step": 7078, "task_loss": 0.1901407092809677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3372025191783905, "epoch": 5.98, "learning_rate": 2.008030431107354e-05, "loss": 0.4201, "step": 7079, "task_loss": 0.723630964756012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3172477185726166, "epoch": 5.98, "learning_rate": 2.0076077768385463e-05, "loss": 0.5028, "step": 7080, "task_loss": 0.7342675924301147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44924086332321167, "epoch": 5.99, "learning_rate": 2.007185122569738e-05, "loss": 0.4042, "step": 7081, "task_loss": 0.4796159565448761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3566740155220032, "epoch": 5.99, "learning_rate": 2.00676246830093e-05, "loss": 0.2899, "step": 7082, "task_loss": 0.8122480511665344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5804363489151001, "epoch": 5.99, "learning_rate": 2.006339814032122e-05, "loss": 0.5362, "step": 7083, "task_loss": 0.8325528502464294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.477544903755188, "epoch": 5.99, "learning_rate": 2.0059171597633135e-05, "loss": 0.4969, "step": 7084, "task_loss": 0.5129575133323669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3535222113132477, "epoch": 5.99, "learning_rate": 2.0054945054945055e-05, "loss": 0.4177, "step": 7085, "task_loss": 1.0047675371170044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20663830637931824, "epoch": 5.99, "learning_rate": 2.0050718512256975e-05, "loss": 0.3591, "step": 7086, "task_loss": 0.7863340377807617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5445507168769836, "epoch": 5.99, "learning_rate": 2.004649196956889e-05, "loss": 0.4676, "step": 7087, "task_loss": 0.558000385761261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23296119272708893, "epoch": 5.99, "learning_rate": 2.0042265426880814e-05, "loss": 0.3041, "step": 7088, "task_loss": 0.3813418447971344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6151244044303894, "epoch": 5.99, "learning_rate": 2.003803888419273e-05, "loss": 0.587, "step": 7089, "task_loss": 1.5875651836395264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45662006735801697, "epoch": 5.99, "learning_rate": 2.003381234150465e-05, "loss": 0.3406, "step": 7090, "task_loss": 0.3505646884441376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5623189210891724, "epoch": 5.99, "learning_rate": 2.002958579881657e-05, "loss": 0.4785, "step": 7091, "task_loss": 0.8563574552536011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5079002380371094, "epoch": 5.99, "learning_rate": 2.0025359256128487e-05, "loss": 0.522, "step": 7092, "task_loss": 1.0612787008285522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5889732241630554, "epoch": 6.0, "learning_rate": 2.0021132713440406e-05, "loss": 0.4594, "step": 7093, "task_loss": 0.4863797724246979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7369677424430847, "epoch": 6.0, "learning_rate": 2.0016906170752326e-05, "loss": 0.5509, "step": 7094, "task_loss": 0.9107285737991333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41503313183784485, "epoch": 6.0, "learning_rate": 2.0012679628064242e-05, "loss": 0.7397, "step": 7095, "task_loss": 0.2903255224227905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.340520977973938, "epoch": 6.0, "learning_rate": 2.0008453085376162e-05, "loss": 0.4755, "step": 7096, "task_loss": 0.7630773186683655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48844391107559204, "epoch": 6.0, "learning_rate": 2.0004226542688082e-05, "loss": 0.5623, "step": 7097, "task_loss": 0.7638906240463257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.343919038772583, "epoch": 6.0, "learning_rate": 2e-05, "loss": 0.4544, "step": 7098, "task_loss": 0.7777069211006165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5701781511306763, "epoch": 6.0, "learning_rate": 1.999577345731192e-05, "loss": 0.9276, "step": 7099, "task_loss": 0.3877008259296417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9569276571273804, "epoch": 6.0, "learning_rate": 1.9991546914623838e-05, "loss": 0.509, "step": 7100, "task_loss": 0.7410070896148682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3484238386154175, "epoch": 6.0, "learning_rate": 1.9987320371935757e-05, "loss": 0.4693, "step": 7101, "task_loss": 0.7160006165504456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35248348116874695, "epoch": 6.0, "learning_rate": 1.9983093829247677e-05, "loss": 0.5155, "step": 7102, "task_loss": 0.5324273109436035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3718428313732147, "epoch": 6.0, "learning_rate": 1.9978867286559594e-05, "loss": 0.4785, "step": 7103, "task_loss": 0.43350714445114136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4625117778778076, "epoch": 6.01, "learning_rate": 1.9974640743871513e-05, "loss": 0.457, "step": 7104, "task_loss": 0.6774790287017822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.424936980009079, "epoch": 6.01, "learning_rate": 1.9970414201183433e-05, "loss": 0.4229, "step": 7105, "task_loss": 0.3508889675140381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49180856347084045, "epoch": 6.01, "learning_rate": 1.9966187658495353e-05, "loss": 0.4535, "step": 7106, "task_loss": 0.31365376710891724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7591346502304077, "epoch": 6.01, "learning_rate": 1.996196111580727e-05, "loss": 0.5221, "step": 7107, "task_loss": 0.6466817259788513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37281787395477295, "epoch": 6.01, "learning_rate": 1.995773457311919e-05, "loss": 0.4229, "step": 7108, "task_loss": 0.39326924085617065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5049030780792236, "epoch": 6.01, "learning_rate": 1.995350803043111e-05, "loss": 0.4458, "step": 7109, "task_loss": 1.6908066272735596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4952152669429779, "epoch": 6.01, "learning_rate": 1.994928148774303e-05, "loss": 0.4272, "step": 7110, "task_loss": 0.126212015748024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6992781162261963, "epoch": 6.01, "learning_rate": 1.9945054945054948e-05, "loss": 0.6332, "step": 7111, "task_loss": 0.47483015060424805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31548428535461426, "epoch": 6.01, "learning_rate": 1.9940828402366864e-05, "loss": 0.3622, "step": 7112, "task_loss": 0.16030895709991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.785224437713623, "epoch": 6.01, "learning_rate": 1.9936601859678784e-05, "loss": 0.5267, "step": 7113, "task_loss": 0.9471098780632019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3346899747848511, "epoch": 6.01, "learning_rate": 1.9932375316990704e-05, "loss": 0.4288, "step": 7114, "task_loss": 0.4941166341304779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1805504709482193, "epoch": 6.01, "learning_rate": 1.992814877430262e-05, "loss": 0.3375, "step": 7115, "task_loss": 0.04331701248884201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.506712019443512, "epoch": 6.02, "learning_rate": 1.992392223161454e-05, "loss": 0.4503, "step": 7116, "task_loss": 0.24130764603614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3008997440338135, "epoch": 6.02, "learning_rate": 1.991969568892646e-05, "loss": 0.3175, "step": 7117, "task_loss": 0.533882200717926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28145119547843933, "epoch": 6.02, "learning_rate": 1.9915469146238376e-05, "loss": 0.4303, "step": 7118, "task_loss": 0.15442383289337158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6824292540550232, "epoch": 6.02, "learning_rate": 1.99112426035503e-05, "loss": 0.4483, "step": 7119, "task_loss": 0.5613414645195007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2743123769760132, "epoch": 6.02, "learning_rate": 1.9907016060862216e-05, "loss": 0.3646, "step": 7120, "task_loss": 0.6803524494171143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4359835386276245, "epoch": 6.02, "learning_rate": 1.9902789518174135e-05, "loss": 0.366, "step": 7121, "task_loss": 0.5511413216590881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5660382509231567, "epoch": 6.02, "learning_rate": 1.9898562975486055e-05, "loss": 0.3876, "step": 7122, "task_loss": 0.9108102917671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33020663261413574, "epoch": 6.02, "learning_rate": 1.989433643279797e-05, "loss": 0.4916, "step": 7123, "task_loss": 0.6552597284317017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4297211766242981, "epoch": 6.02, "learning_rate": 1.989010989010989e-05, "loss": 0.5881, "step": 7124, "task_loss": 0.6215572953224182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35129514336586, "epoch": 6.02, "learning_rate": 1.988588334742181e-05, "loss": 0.3899, "step": 7125, "task_loss": 0.6472136974334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.402021586894989, "epoch": 6.02, "learning_rate": 1.9881656804733727e-05, "loss": 0.5175, "step": 7126, "task_loss": 0.9622039794921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3925040662288666, "epoch": 6.02, "learning_rate": 1.987743026204565e-05, "loss": 0.3536, "step": 7127, "task_loss": 0.4665308892726898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5384204387664795, "epoch": 6.03, "learning_rate": 1.9873203719357567e-05, "loss": 0.4926, "step": 7128, "task_loss": 0.5490915775299072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43364787101745605, "epoch": 6.03, "learning_rate": 1.9868977176669483e-05, "loss": 0.6371, "step": 7129, "task_loss": 0.8415454030036926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5074772834777832, "epoch": 6.03, "learning_rate": 1.9864750633981406e-05, "loss": 0.5866, "step": 7130, "task_loss": 0.6001825928688049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38212740421295166, "epoch": 6.03, "learning_rate": 1.9860524091293323e-05, "loss": 0.632, "step": 7131, "task_loss": 0.3304152190685272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4040065109729767, "epoch": 6.03, "learning_rate": 1.985629754860524e-05, "loss": 0.3547, "step": 7132, "task_loss": 0.477699339389801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20576882362365723, "epoch": 6.03, "learning_rate": 1.9852071005917162e-05, "loss": 0.5589, "step": 7133, "task_loss": 0.5750070214271545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42077529430389404, "epoch": 6.03, "learning_rate": 1.984784446322908e-05, "loss": 0.3648, "step": 7134, "task_loss": 0.692491888999939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4043070077896118, "epoch": 6.03, "learning_rate": 1.9843617920540998e-05, "loss": 0.3581, "step": 7135, "task_loss": 0.6107720136642456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4472653567790985, "epoch": 6.03, "learning_rate": 1.9839391377852918e-05, "loss": 0.3986, "step": 7136, "task_loss": 0.48385530710220337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4865350127220154, "epoch": 6.03, "learning_rate": 1.9835164835164834e-05, "loss": 0.4606, "step": 7137, "task_loss": 0.9772742986679077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.394976407289505, "epoch": 6.03, "learning_rate": 1.9830938292476757e-05, "loss": 0.4254, "step": 7138, "task_loss": 0.48060402274131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26181313395500183, "epoch": 6.03, "learning_rate": 1.9826711749788674e-05, "loss": 0.2869, "step": 7139, "task_loss": 0.4717243015766144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3027476668357849, "epoch": 6.04, "learning_rate": 1.9822485207100593e-05, "loss": 0.5421, "step": 7140, "task_loss": 0.33248940110206604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4299558997154236, "epoch": 6.04, "learning_rate": 1.9818258664412513e-05, "loss": 0.4926, "step": 7141, "task_loss": 0.3326123058795929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5699581503868103, "epoch": 6.04, "learning_rate": 1.981403212172443e-05, "loss": 0.4683, "step": 7142, "task_loss": 0.4445810317993164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6149921417236328, "epoch": 6.04, "learning_rate": 1.980980557903635e-05, "loss": 0.4229, "step": 7143, "task_loss": 0.4773853123188019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34407907724380493, "epoch": 6.04, "learning_rate": 1.980557903634827e-05, "loss": 0.3924, "step": 7144, "task_loss": 0.4509623944759369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6502318978309631, "epoch": 6.04, "learning_rate": 1.9801352493660185e-05, "loss": 0.5104, "step": 7145, "task_loss": 1.2366260290145874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41682109236717224, "epoch": 6.04, "learning_rate": 1.9797125950972105e-05, "loss": 0.5311, "step": 7146, "task_loss": 0.7743030786514282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37789708375930786, "epoch": 6.04, "learning_rate": 1.9792899408284025e-05, "loss": 0.3977, "step": 7147, "task_loss": 0.9515600800514221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2106633186340332, "epoch": 6.04, "learning_rate": 1.9788672865595945e-05, "loss": 0.3723, "step": 7148, "task_loss": 0.24695168435573578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4226333498954773, "epoch": 6.04, "learning_rate": 1.978444632290786e-05, "loss": 0.4265, "step": 7149, "task_loss": 0.8468390107154846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41120079159736633, "epoch": 6.04, "learning_rate": 1.978021978021978e-05, "loss": 0.4926, "step": 7150, "task_loss": 0.6580773591995239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2757922112941742, "epoch": 6.04, "learning_rate": 1.97759932375317e-05, "loss": 0.3017, "step": 7151, "task_loss": 0.3530746102333069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5025150775909424, "epoch": 6.05, "learning_rate": 1.977176669484362e-05, "loss": 0.4684, "step": 7152, "task_loss": 0.8743808269500732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.322287917137146, "epoch": 6.05, "learning_rate": 1.9767540152155536e-05, "loss": 0.413, "step": 7153, "task_loss": 0.08523182570934296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28315162658691406, "epoch": 6.05, "learning_rate": 1.9763313609467456e-05, "loss": 0.4788, "step": 7154, "task_loss": 0.17469990253448486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35658466815948486, "epoch": 6.05, "learning_rate": 1.9759087066779376e-05, "loss": 0.4789, "step": 7155, "task_loss": 0.09968946129083633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5890517234802246, "epoch": 6.05, "learning_rate": 1.9754860524091296e-05, "loss": 0.5579, "step": 7156, "task_loss": 0.9114635586738586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25344109535217285, "epoch": 6.05, "learning_rate": 1.9750633981403212e-05, "loss": 0.4819, "step": 7157, "task_loss": 0.359023779630661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2757277190685272, "epoch": 6.05, "learning_rate": 1.9746407438715132e-05, "loss": 0.3359, "step": 7158, "task_loss": 0.3692559599876404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49395227432250977, "epoch": 6.05, "learning_rate": 1.974218089602705e-05, "loss": 0.3932, "step": 7159, "task_loss": 0.65919029712677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2909431755542755, "epoch": 6.05, "learning_rate": 1.9737954353338968e-05, "loss": 0.5143, "step": 7160, "task_loss": 0.20295463502407074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39850038290023804, "epoch": 6.05, "learning_rate": 1.9733727810650888e-05, "loss": 0.4272, "step": 7161, "task_loss": 0.7382453680038452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4145810604095459, "epoch": 6.05, "learning_rate": 1.9729501267962807e-05, "loss": 0.4424, "step": 7162, "task_loss": 0.27712002396583557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3964119553565979, "epoch": 6.05, "learning_rate": 1.9725274725274727e-05, "loss": 0.3528, "step": 7163, "task_loss": 0.3206097185611725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33977770805358887, "epoch": 6.06, "learning_rate": 1.9721048182586647e-05, "loss": 0.4619, "step": 7164, "task_loss": 0.6170482039451599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6539547443389893, "epoch": 6.06, "learning_rate": 1.9716821639898563e-05, "loss": 0.588, "step": 7165, "task_loss": 0.7581604719161987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43939924240112305, "epoch": 6.06, "learning_rate": 1.9712595097210483e-05, "loss": 0.3468, "step": 7166, "task_loss": 0.10226503759622574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2961534559726715, "epoch": 6.06, "learning_rate": 1.9708368554522403e-05, "loss": 0.5043, "step": 7167, "task_loss": 0.1020750179886818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36550116539001465, "epoch": 6.06, "learning_rate": 1.970414201183432e-05, "loss": 0.386, "step": 7168, "task_loss": 0.14622335135936737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3229232430458069, "epoch": 6.06, "learning_rate": 1.9699915469146242e-05, "loss": 0.4696, "step": 7169, "task_loss": 0.4402432441711426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3905911445617676, "epoch": 6.06, "learning_rate": 1.969568892645816e-05, "loss": 0.4543, "step": 7170, "task_loss": 0.383543461561203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4533047676086426, "epoch": 6.06, "learning_rate": 1.9691462383770075e-05, "loss": 0.4283, "step": 7171, "task_loss": 0.8107605576515198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3723984360694885, "epoch": 6.06, "learning_rate": 1.9687235841081998e-05, "loss": 0.4786, "step": 7172, "task_loss": 0.16828909516334534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36962050199508667, "epoch": 6.06, "learning_rate": 1.9683009298393914e-05, "loss": 0.6587, "step": 7173, "task_loss": 1.2329379320144653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29564645886421204, "epoch": 6.06, "learning_rate": 1.9678782755705834e-05, "loss": 0.5681, "step": 7174, "task_loss": 0.7224622368812561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.538479208946228, "epoch": 6.07, "learning_rate": 1.9674556213017754e-05, "loss": 0.4559, "step": 7175, "task_loss": 0.6971434950828552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5118979215621948, "epoch": 6.07, "learning_rate": 1.967032967032967e-05, "loss": 0.4474, "step": 7176, "task_loss": 0.14605680108070374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.322388231754303, "epoch": 6.07, "learning_rate": 1.966610312764159e-05, "loss": 0.4133, "step": 7177, "task_loss": 0.06437411904335022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5000052452087402, "epoch": 6.07, "learning_rate": 1.966187658495351e-05, "loss": 0.4551, "step": 7178, "task_loss": 0.3907923698425293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3304354250431061, "epoch": 6.07, "learning_rate": 1.9657650042265426e-05, "loss": 0.3622, "step": 7179, "task_loss": 0.8238796591758728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5665068030357361, "epoch": 6.07, "learning_rate": 1.965342349957735e-05, "loss": 0.5243, "step": 7180, "task_loss": 1.3886237144470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.416795939207077, "epoch": 6.07, "learning_rate": 1.9649196956889265e-05, "loss": 0.4367, "step": 7181, "task_loss": 0.9146501421928406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5946551561355591, "epoch": 6.07, "learning_rate": 1.9644970414201182e-05, "loss": 0.4145, "step": 7182, "task_loss": 0.7932815551757812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24415510892868042, "epoch": 6.07, "learning_rate": 1.9640743871513105e-05, "loss": 0.3678, "step": 7183, "task_loss": 0.05898415297269821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.664276123046875, "epoch": 6.07, "learning_rate": 1.963651732882502e-05, "loss": 0.6169, "step": 7184, "task_loss": 0.16563449800014496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4112812280654907, "epoch": 6.07, "learning_rate": 1.963229078613694e-05, "loss": 0.4604, "step": 7185, "task_loss": 0.7957295179367065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3325442969799042, "epoch": 6.07, "learning_rate": 1.962806424344886e-05, "loss": 0.3323, "step": 7186, "task_loss": 0.45132017135620117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3425899147987366, "epoch": 6.08, "learning_rate": 1.9623837700760777e-05, "loss": 0.4792, "step": 7187, "task_loss": 0.47369828820228577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5338979363441467, "epoch": 6.08, "learning_rate": 1.9619611158072697e-05, "loss": 0.4862, "step": 7188, "task_loss": 1.300980806350708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37640833854675293, "epoch": 6.08, "learning_rate": 1.9615384615384617e-05, "loss": 0.4266, "step": 7189, "task_loss": 0.8976707458496094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6532567739486694, "epoch": 6.08, "learning_rate": 1.9611158072696533e-05, "loss": 0.4854, "step": 7190, "task_loss": 1.5067567825317383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3923265337944031, "epoch": 6.08, "learning_rate": 1.9606931530008456e-05, "loss": 0.4297, "step": 7191, "task_loss": 0.5677223205566406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26764655113220215, "epoch": 6.08, "learning_rate": 1.9602704987320372e-05, "loss": 0.3783, "step": 7192, "task_loss": 0.49393540620803833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6408248543739319, "epoch": 6.08, "learning_rate": 1.9598478444632292e-05, "loss": 0.4839, "step": 7193, "task_loss": 1.2655560970306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3643551170825958, "epoch": 6.08, "learning_rate": 1.9594251901944212e-05, "loss": 0.4415, "step": 7194, "task_loss": 0.31174948811531067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4293809235095978, "epoch": 6.08, "learning_rate": 1.9590025359256128e-05, "loss": 0.3371, "step": 7195, "task_loss": 0.3325020372867584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4124053120613098, "epoch": 6.08, "learning_rate": 1.9585798816568048e-05, "loss": 0.41, "step": 7196, "task_loss": 1.1366801261901855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5854707956314087, "epoch": 6.08, "learning_rate": 1.9581572273879968e-05, "loss": 0.518, "step": 7197, "task_loss": 0.6232240200042725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37119153141975403, "epoch": 6.08, "learning_rate": 1.9577345731191887e-05, "loss": 0.3482, "step": 7198, "task_loss": 0.27896738052368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3135519325733185, "epoch": 6.09, "learning_rate": 1.9573119188503804e-05, "loss": 0.3737, "step": 7199, "task_loss": 0.7761308550834656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.269506573677063, "epoch": 6.09, "learning_rate": 1.9568892645815723e-05, "loss": 0.4045, "step": 7200, "task_loss": 0.5865101218223572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4285007119178772, "epoch": 6.09, "learning_rate": 1.9564666103127643e-05, "loss": 0.5723, "step": 7201, "task_loss": 0.15124337375164032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3308812081813812, "epoch": 6.09, "learning_rate": 1.956043956043956e-05, "loss": 0.3671, "step": 7202, "task_loss": 0.13573087751865387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2836777865886688, "epoch": 6.09, "learning_rate": 1.955621301775148e-05, "loss": 0.4015, "step": 7203, "task_loss": 0.7070236206054688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25491344928741455, "epoch": 6.09, "learning_rate": 1.95519864750634e-05, "loss": 0.4813, "step": 7204, "task_loss": 0.8665040135383606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4460285007953644, "epoch": 6.09, "learning_rate": 1.954775993237532e-05, "loss": 0.5672, "step": 7205, "task_loss": 0.36557960510253906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5708189010620117, "epoch": 6.09, "learning_rate": 1.954353338968724e-05, "loss": 0.5532, "step": 7206, "task_loss": 1.1132572889328003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43596431612968445, "epoch": 6.09, "learning_rate": 1.9539306846999155e-05, "loss": 0.4421, "step": 7207, "task_loss": 0.32812559604644775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2519352436065674, "epoch": 6.09, "learning_rate": 1.9535080304311075e-05, "loss": 0.4791, "step": 7208, "task_loss": 0.7005215287208557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3390798568725586, "epoch": 6.09, "learning_rate": 1.9530853761622994e-05, "loss": 0.3988, "step": 7209, "task_loss": 0.22908218204975128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3662128448486328, "epoch": 6.09, "learning_rate": 1.952662721893491e-05, "loss": 0.3944, "step": 7210, "task_loss": 0.5346511006355286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5354434251785278, "epoch": 6.1, "learning_rate": 1.952240067624683e-05, "loss": 0.5107, "step": 7211, "task_loss": 1.00706148147583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47801145911216736, "epoch": 6.1, "learning_rate": 1.951817413355875e-05, "loss": 0.472, "step": 7212, "task_loss": 0.16307665407657623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3968490958213806, "epoch": 6.1, "learning_rate": 1.9513947590870667e-05, "loss": 0.4674, "step": 7213, "task_loss": 0.4874959886074066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4203256368637085, "epoch": 6.1, "learning_rate": 1.950972104818259e-05, "loss": 0.4924, "step": 7214, "task_loss": 0.3265974223613739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38115596771240234, "epoch": 6.1, "learning_rate": 1.9505494505494506e-05, "loss": 0.4146, "step": 7215, "task_loss": 0.4111417531967163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45491865277290344, "epoch": 6.1, "learning_rate": 1.9501267962806426e-05, "loss": 0.4446, "step": 7216, "task_loss": 0.6150575876235962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7031038999557495, "epoch": 6.1, "learning_rate": 1.9497041420118345e-05, "loss": 0.5642, "step": 7217, "task_loss": 1.0282368659973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7475574612617493, "epoch": 6.1, "learning_rate": 1.9492814877430262e-05, "loss": 0.4913, "step": 7218, "task_loss": 0.6680750846862793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3313060998916626, "epoch": 6.1, "learning_rate": 1.948858833474218e-05, "loss": 0.3971, "step": 7219, "task_loss": 1.079016923904419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4855874478816986, "epoch": 6.1, "learning_rate": 1.94843617920541e-05, "loss": 0.4845, "step": 7220, "task_loss": 1.01728355884552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38026174902915955, "epoch": 6.1, "learning_rate": 1.9480135249366018e-05, "loss": 0.4577, "step": 7221, "task_loss": 0.5155710577964783 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3255561888217926, "epoch": 6.1, "learning_rate": 1.947590870667794e-05, "loss": 0.4267, "step": 7222, "task_loss": 0.11263782531023026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4074364900588989, "epoch": 6.11, "learning_rate": 1.9471682163989857e-05, "loss": 0.4735, "step": 7223, "task_loss": 0.5037665963172913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3519672453403473, "epoch": 6.11, "learning_rate": 1.9467455621301774e-05, "loss": 0.5246, "step": 7224, "task_loss": 1.0137887001037598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4308398365974426, "epoch": 6.11, "learning_rate": 1.9463229078613697e-05, "loss": 0.3713, "step": 7225, "task_loss": 0.8662633895874023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4749898314476013, "epoch": 6.11, "learning_rate": 1.9459002535925613e-05, "loss": 0.5471, "step": 7226, "task_loss": 0.18816721439361572 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5018928647041321, "epoch": 6.11, "learning_rate": 1.9454775993237533e-05, "loss": 0.4716, "step": 7227, "task_loss": 0.34584859013557434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48448842763900757, "epoch": 6.11, "learning_rate": 1.9450549450549452e-05, "loss": 0.4319, "step": 7228, "task_loss": 0.7106772065162659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.600232720375061, "epoch": 6.11, "learning_rate": 1.944632290786137e-05, "loss": 0.4593, "step": 7229, "task_loss": 0.5822135210037231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33292192220687866, "epoch": 6.11, "learning_rate": 1.944209636517329e-05, "loss": 0.3745, "step": 7230, "task_loss": 0.2658970355987549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3390076756477356, "epoch": 6.11, "learning_rate": 1.9437869822485208e-05, "loss": 0.46, "step": 7231, "task_loss": 0.622743546962738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39441582560539246, "epoch": 6.11, "learning_rate": 1.9433643279797125e-05, "loss": 0.3995, "step": 7232, "task_loss": 0.19118547439575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36146610975265503, "epoch": 6.11, "learning_rate": 1.9429416737109048e-05, "loss": 0.3749, "step": 7233, "task_loss": 0.6277105212211609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41977620124816895, "epoch": 6.11, "learning_rate": 1.9425190194420964e-05, "loss": 0.4191, "step": 7234, "task_loss": 0.5643138289451599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6952922344207764, "epoch": 6.12, "learning_rate": 1.9420963651732884e-05, "loss": 0.5151, "step": 7235, "task_loss": 0.4672245681285858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38035303354263306, "epoch": 6.12, "learning_rate": 1.9416737109044804e-05, "loss": 0.4027, "step": 7236, "task_loss": 0.6459356546401978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7246323823928833, "epoch": 6.12, "learning_rate": 1.941251056635672e-05, "loss": 0.5131, "step": 7237, "task_loss": 1.1017228364944458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2764829099178314, "epoch": 6.12, "learning_rate": 1.940828402366864e-05, "loss": 0.3546, "step": 7238, "task_loss": 0.1499372273683548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45127758383750916, "epoch": 6.12, "learning_rate": 1.940405748098056e-05, "loss": 0.4293, "step": 7239, "task_loss": 1.313875436782837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6153087019920349, "epoch": 6.12, "learning_rate": 1.9399830938292476e-05, "loss": 0.5704, "step": 7240, "task_loss": 0.29242271184921265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43569034337997437, "epoch": 6.12, "learning_rate": 1.9395604395604396e-05, "loss": 0.3923, "step": 7241, "task_loss": 0.5791569948196411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3548681437969208, "epoch": 6.12, "learning_rate": 1.9391377852916315e-05, "loss": 0.4402, "step": 7242, "task_loss": 0.6347848176956177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2748337686061859, "epoch": 6.12, "learning_rate": 1.9387151310228235e-05, "loss": 0.3238, "step": 7243, "task_loss": 0.5896656513214111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5457012057304382, "epoch": 6.12, "learning_rate": 1.9382924767540155e-05, "loss": 0.4938, "step": 7244, "task_loss": 0.5273101329803467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.647644579410553, "epoch": 6.12, "learning_rate": 1.937869822485207e-05, "loss": 0.5782, "step": 7245, "task_loss": 0.6769357323646545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30943426489830017, "epoch": 6.13, "learning_rate": 1.937447168216399e-05, "loss": 0.4299, "step": 7246, "task_loss": 0.7686152458190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43437010049819946, "epoch": 6.13, "learning_rate": 1.937024513947591e-05, "loss": 0.4078, "step": 7247, "task_loss": 1.0504783391952515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5804103016853333, "epoch": 6.13, "learning_rate": 1.9366018596787827e-05, "loss": 0.4329, "step": 7248, "task_loss": 0.7005054354667664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5804166793823242, "epoch": 6.13, "learning_rate": 1.9361792054099747e-05, "loss": 0.3514, "step": 7249, "task_loss": 0.6372331380844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4440772235393524, "epoch": 6.13, "learning_rate": 1.9357565511411666e-05, "loss": 0.4097, "step": 7250, "task_loss": 0.7166685461997986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44156256318092346, "epoch": 6.13, "learning_rate": 1.9353338968723586e-05, "loss": 0.3918, "step": 7251, "task_loss": 0.9399154186248779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5595995187759399, "epoch": 6.13, "learning_rate": 1.9349112426035502e-05, "loss": 0.5349, "step": 7252, "task_loss": 0.22627943754196167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3984464406967163, "epoch": 6.13, "learning_rate": 1.9344885883347422e-05, "loss": 0.4581, "step": 7253, "task_loss": 0.2679952085018158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3562823235988617, "epoch": 6.13, "learning_rate": 1.9340659340659342e-05, "loss": 0.5453, "step": 7254, "task_loss": 0.6950831413269043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2506830096244812, "epoch": 6.13, "learning_rate": 1.933643279797126e-05, "loss": 0.3891, "step": 7255, "task_loss": 0.5149086713790894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39791470766067505, "epoch": 6.13, "learning_rate": 1.933220625528318e-05, "loss": 0.4162, "step": 7256, "task_loss": 0.4197749197483063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5846793055534363, "epoch": 6.13, "learning_rate": 1.9327979712595098e-05, "loss": 0.381, "step": 7257, "task_loss": 0.8309436440467834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.458749920129776, "epoch": 6.14, "learning_rate": 1.9323753169907018e-05, "loss": 0.6184, "step": 7258, "task_loss": 0.9189620614051819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24357575178146362, "epoch": 6.14, "learning_rate": 1.9319526627218937e-05, "loss": 0.3191, "step": 7259, "task_loss": 0.3334467113018036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4442724883556366, "epoch": 6.14, "learning_rate": 1.9315300084530854e-05, "loss": 0.4614, "step": 7260, "task_loss": 0.6521475315093994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42344123125076294, "epoch": 6.14, "learning_rate": 1.9311073541842773e-05, "loss": 0.361, "step": 7261, "task_loss": 0.05495256185531616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24567323923110962, "epoch": 6.14, "learning_rate": 1.9306846999154693e-05, "loss": 0.3256, "step": 7262, "task_loss": 0.022114817053079605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2565224766731262, "epoch": 6.14, "learning_rate": 1.930262045646661e-05, "loss": 0.5277, "step": 7263, "task_loss": 0.6957934498786926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35377541184425354, "epoch": 6.14, "learning_rate": 1.9298393913778533e-05, "loss": 0.4873, "step": 7264, "task_loss": 2.5892395973205566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4806590974330902, "epoch": 6.14, "learning_rate": 1.929416737109045e-05, "loss": 0.4966, "step": 7265, "task_loss": 0.9655662775039673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6087069511413574, "epoch": 6.14, "learning_rate": 1.9289940828402365e-05, "loss": 0.6755, "step": 7266, "task_loss": 0.4365273714065552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39462003111839294, "epoch": 6.14, "learning_rate": 1.928571428571429e-05, "loss": 0.3276, "step": 7267, "task_loss": 0.9629340767860413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36402931809425354, "epoch": 6.14, "learning_rate": 1.9281487743026205e-05, "loss": 0.4256, "step": 7268, "task_loss": 0.797833263874054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31495988368988037, "epoch": 6.14, "learning_rate": 1.9277261200338124e-05, "loss": 0.4138, "step": 7269, "task_loss": 0.2734087407588959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9992058873176575, "epoch": 6.15, "learning_rate": 1.9273034657650044e-05, "loss": 0.5954, "step": 7270, "task_loss": 0.3763977885246277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6159965991973877, "epoch": 6.15, "learning_rate": 1.926880811496196e-05, "loss": 0.5735, "step": 7271, "task_loss": 0.38052043318748474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3040849566459656, "epoch": 6.15, "learning_rate": 1.9264581572273884e-05, "loss": 0.3337, "step": 7272, "task_loss": 0.2077777087688446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7303988337516785, "epoch": 6.15, "learning_rate": 1.92603550295858e-05, "loss": 0.4382, "step": 7273, "task_loss": 0.4692193865776062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3083440661430359, "epoch": 6.15, "learning_rate": 1.9256128486897716e-05, "loss": 0.391, "step": 7274, "task_loss": 0.7587782144546509 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44039517641067505, "epoch": 6.15, "learning_rate": 1.925190194420964e-05, "loss": 0.5151, "step": 7275, "task_loss": 0.9947290420532227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7071822285652161, "epoch": 6.15, "learning_rate": 1.9247675401521556e-05, "loss": 0.6124, "step": 7276, "task_loss": 1.531227707862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3843895196914673, "epoch": 6.15, "learning_rate": 1.9243448858833472e-05, "loss": 0.4671, "step": 7277, "task_loss": 0.23336240649223328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5710169076919556, "epoch": 6.15, "learning_rate": 1.9239222316145395e-05, "loss": 0.4727, "step": 7278, "task_loss": 0.10760960727930069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31109243631362915, "epoch": 6.15, "learning_rate": 1.923499577345731e-05, "loss": 0.4631, "step": 7279, "task_loss": 0.43082380294799805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36493390798568726, "epoch": 6.15, "learning_rate": 1.923076923076923e-05, "loss": 0.3367, "step": 7280, "task_loss": 0.49687132239341736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41362395882606506, "epoch": 6.15, "learning_rate": 1.922654268808115e-05, "loss": 0.4922, "step": 7281, "task_loss": 0.748630166053772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47343310713768005, "epoch": 6.16, "learning_rate": 1.9222316145393068e-05, "loss": 0.5065, "step": 7282, "task_loss": 0.4317403733730316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3707965910434723, "epoch": 6.16, "learning_rate": 1.9218089602704987e-05, "loss": 0.5196, "step": 7283, "task_loss": 0.6811929941177368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3478126525878906, "epoch": 6.16, "learning_rate": 1.9213863060016907e-05, "loss": 0.2901, "step": 7284, "task_loss": 0.36637598276138306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31879478693008423, "epoch": 6.16, "learning_rate": 1.9209636517328827e-05, "loss": 0.4351, "step": 7285, "task_loss": 0.34347429871559143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5941683053970337, "epoch": 6.16, "learning_rate": 1.9205409974640746e-05, "loss": 0.5538, "step": 7286, "task_loss": 0.5448051691055298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34017014503479004, "epoch": 6.16, "learning_rate": 1.9201183431952663e-05, "loss": 0.4121, "step": 7287, "task_loss": 0.09873310476541519 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3984287977218628, "epoch": 6.16, "learning_rate": 1.9196956889264583e-05, "loss": 0.3567, "step": 7288, "task_loss": 0.3622848093509674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35995280742645264, "epoch": 6.16, "learning_rate": 1.9192730346576502e-05, "loss": 0.5538, "step": 7289, "task_loss": 1.3703365325927734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4665844738483429, "epoch": 6.16, "learning_rate": 1.918850380388842e-05, "loss": 0.3651, "step": 7290, "task_loss": 0.4525938332080841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4479590058326721, "epoch": 6.16, "learning_rate": 1.918427726120034e-05, "loss": 0.5299, "step": 7291, "task_loss": 0.2775273621082306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29000866413116455, "epoch": 6.16, "learning_rate": 1.9180050718512258e-05, "loss": 0.4321, "step": 7292, "task_loss": 0.5615448355674744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6213821172714233, "epoch": 6.16, "learning_rate": 1.9175824175824178e-05, "loss": 0.4756, "step": 7293, "task_loss": 0.6850985288619995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3296683132648468, "epoch": 6.17, "learning_rate": 1.9171597633136094e-05, "loss": 0.3898, "step": 7294, "task_loss": 0.6386235952377319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30037155747413635, "epoch": 6.17, "learning_rate": 1.9167371090448014e-05, "loss": 0.3686, "step": 7295, "task_loss": 0.15430592000484467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5931505560874939, "epoch": 6.17, "learning_rate": 1.9163144547759934e-05, "loss": 0.5338, "step": 7296, "task_loss": 0.724937915802002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8483796715736389, "epoch": 6.17, "learning_rate": 1.9158918005071853e-05, "loss": 0.5767, "step": 7297, "task_loss": 0.4056936502456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6271671056747437, "epoch": 6.17, "learning_rate": 1.915469146238377e-05, "loss": 0.3816, "step": 7298, "task_loss": 0.635051429271698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3000023066997528, "epoch": 6.17, "learning_rate": 1.915046491969569e-05, "loss": 0.3515, "step": 7299, "task_loss": 0.8585831522941589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3297983705997467, "epoch": 6.17, "learning_rate": 1.914623837700761e-05, "loss": 0.4136, "step": 7300, "task_loss": 0.7414449453353882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8679963946342468, "epoch": 6.17, "learning_rate": 1.914201183431953e-05, "loss": 0.5139, "step": 7301, "task_loss": 1.1205123662948608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20349545776844025, "epoch": 6.17, "learning_rate": 1.9137785291631445e-05, "loss": 0.3401, "step": 7302, "task_loss": 0.12631426751613617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3698154389858246, "epoch": 6.17, "learning_rate": 1.9133558748943365e-05, "loss": 0.4391, "step": 7303, "task_loss": 0.3198661208152771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32010409235954285, "epoch": 6.17, "learning_rate": 1.9129332206255285e-05, "loss": 0.4392, "step": 7304, "task_loss": 0.6103374361991882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41391831636428833, "epoch": 6.17, "learning_rate": 1.91251056635672e-05, "loss": 0.566, "step": 7305, "task_loss": 1.4587033987045288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2899538278579712, "epoch": 6.18, "learning_rate": 1.912087912087912e-05, "loss": 0.3796, "step": 7306, "task_loss": 0.05296706408262253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46851879358291626, "epoch": 6.18, "learning_rate": 1.911665257819104e-05, "loss": 0.4386, "step": 7307, "task_loss": 0.16368341445922852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7770696878433228, "epoch": 6.18, "learning_rate": 1.911242603550296e-05, "loss": 0.4877, "step": 7308, "task_loss": 1.6495201587677002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.466976523399353, "epoch": 6.18, "learning_rate": 1.910819949281488e-05, "loss": 0.496, "step": 7309, "task_loss": 0.8720992803573608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25297194719314575, "epoch": 6.18, "learning_rate": 1.9103972950126796e-05, "loss": 0.424, "step": 7310, "task_loss": 0.2618497610092163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5290127396583557, "epoch": 6.18, "learning_rate": 1.9099746407438716e-05, "loss": 0.4765, "step": 7311, "task_loss": 0.5330207943916321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3235194683074951, "epoch": 6.18, "learning_rate": 1.9095519864750636e-05, "loss": 0.4227, "step": 7312, "task_loss": 0.39497196674346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3416614234447479, "epoch": 6.18, "learning_rate": 1.9091293322062552e-05, "loss": 0.4681, "step": 7313, "task_loss": 0.3449939489364624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3195849061012268, "epoch": 6.18, "learning_rate": 1.9087066779374475e-05, "loss": 0.3835, "step": 7314, "task_loss": 0.22382360696792603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31678280234336853, "epoch": 6.18, "learning_rate": 1.9082840236686392e-05, "loss": 0.3901, "step": 7315, "task_loss": 0.738714873790741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3156195282936096, "epoch": 6.18, "learning_rate": 1.9078613693998308e-05, "loss": 0.4359, "step": 7316, "task_loss": 0.4473409354686737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44515860080718994, "epoch": 6.19, "learning_rate": 1.907438715131023e-05, "loss": 0.5025, "step": 7317, "task_loss": 0.3482765257358551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23016749322414398, "epoch": 6.19, "learning_rate": 1.9070160608622148e-05, "loss": 0.3331, "step": 7318, "task_loss": 0.5439857244491577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33232271671295166, "epoch": 6.19, "learning_rate": 1.9065934065934067e-05, "loss": 0.3679, "step": 7319, "task_loss": 0.6494312882423401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47448423504829407, "epoch": 6.19, "learning_rate": 1.9061707523245987e-05, "loss": 0.4674, "step": 7320, "task_loss": 1.3593417406082153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5232985615730286, "epoch": 6.19, "learning_rate": 1.9057480980557903e-05, "loss": 0.5416, "step": 7321, "task_loss": 1.7466336488723755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43863600492477417, "epoch": 6.19, "learning_rate": 1.9053254437869823e-05, "loss": 0.4628, "step": 7322, "task_loss": 0.8731913566589355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3592334985733032, "epoch": 6.19, "learning_rate": 1.9049027895181743e-05, "loss": 0.424, "step": 7323, "task_loss": 0.751307487487793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5468201041221619, "epoch": 6.19, "learning_rate": 1.904480135249366e-05, "loss": 0.4954, "step": 7324, "task_loss": 0.4092971086502075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45807576179504395, "epoch": 6.19, "learning_rate": 1.9040574809805582e-05, "loss": 0.4143, "step": 7325, "task_loss": 0.6595118045806885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.235312819480896, "epoch": 6.19, "learning_rate": 1.90363482671175e-05, "loss": 0.4287, "step": 7326, "task_loss": 0.14099450409412384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24788406491279602, "epoch": 6.19, "learning_rate": 1.9032121724429415e-05, "loss": 0.4505, "step": 7327, "task_loss": 0.16480973362922668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19721949100494385, "epoch": 6.19, "learning_rate": 1.9027895181741338e-05, "loss": 0.3314, "step": 7328, "task_loss": 0.5221731662750244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20585167407989502, "epoch": 6.2, "learning_rate": 1.9023668639053255e-05, "loss": 0.3875, "step": 7329, "task_loss": 0.7031477689743042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4062773883342743, "epoch": 6.2, "learning_rate": 1.9019442096365174e-05, "loss": 0.4376, "step": 7330, "task_loss": 0.308798223733902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32946330308914185, "epoch": 6.2, "learning_rate": 1.9015215553677094e-05, "loss": 0.4217, "step": 7331, "task_loss": 0.3941155970096588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7146567106246948, "epoch": 6.2, "learning_rate": 1.901098901098901e-05, "loss": 0.6376, "step": 7332, "task_loss": 0.8359385132789612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2814042866230011, "epoch": 6.2, "learning_rate": 1.900676246830093e-05, "loss": 0.3658, "step": 7333, "task_loss": 0.8458582758903503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31023794412612915, "epoch": 6.2, "learning_rate": 1.900253592561285e-05, "loss": 0.3802, "step": 7334, "task_loss": 0.7970231175422668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3722670078277588, "epoch": 6.2, "learning_rate": 1.8998309382924766e-05, "loss": 0.3897, "step": 7335, "task_loss": 0.8856242895126343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2682872414588928, "epoch": 6.2, "learning_rate": 1.899408284023669e-05, "loss": 0.3356, "step": 7336, "task_loss": 0.044519949704408646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3589381277561188, "epoch": 6.2, "learning_rate": 1.8989856297548606e-05, "loss": 0.4341, "step": 7337, "task_loss": 0.410133421421051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6460956335067749, "epoch": 6.2, "learning_rate": 1.8985629754860525e-05, "loss": 0.5067, "step": 7338, "task_loss": 0.7695217728614807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22184737026691437, "epoch": 6.2, "learning_rate": 1.8981403212172445e-05, "loss": 0.3334, "step": 7339, "task_loss": 0.5770419836044312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22460484504699707, "epoch": 6.2, "learning_rate": 1.897717666948436e-05, "loss": 0.4166, "step": 7340, "task_loss": 0.26320019364356995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27416718006134033, "epoch": 6.21, "learning_rate": 1.897295012679628e-05, "loss": 0.4181, "step": 7341, "task_loss": 0.42398548126220703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37002819776535034, "epoch": 6.21, "learning_rate": 1.89687235841082e-05, "loss": 0.3635, "step": 7342, "task_loss": 0.6759760975837708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2989233732223511, "epoch": 6.21, "learning_rate": 1.896449704142012e-05, "loss": 0.3555, "step": 7343, "task_loss": 0.39603909850120544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.571068286895752, "epoch": 6.21, "learning_rate": 1.8960270498732037e-05, "loss": 0.4749, "step": 7344, "task_loss": 0.3604673445224762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6240682601928711, "epoch": 6.21, "learning_rate": 1.8956043956043957e-05, "loss": 0.4988, "step": 7345, "task_loss": 0.8339646458625793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27595847845077515, "epoch": 6.21, "learning_rate": 1.8951817413355877e-05, "loss": 0.4719, "step": 7346, "task_loss": 0.2777312397956848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6711755990982056, "epoch": 6.21, "learning_rate": 1.8947590870667793e-05, "loss": 0.4439, "step": 7347, "task_loss": 0.9724555015563965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3729579448699951, "epoch": 6.21, "learning_rate": 1.8943364327979713e-05, "loss": 0.429, "step": 7348, "task_loss": 0.1850886195898056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.679522693157196, "epoch": 6.21, "learning_rate": 1.8939137785291632e-05, "loss": 0.6091, "step": 7349, "task_loss": 1.6236156225204468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20857428014278412, "epoch": 6.21, "learning_rate": 1.8934911242603552e-05, "loss": 0.3602, "step": 7350, "task_loss": 0.10442803055047989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3147681951522827, "epoch": 6.21, "learning_rate": 1.8930684699915472e-05, "loss": 0.4358, "step": 7351, "task_loss": 0.9983011484146118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.566206693649292, "epoch": 6.21, "learning_rate": 1.8926458157227388e-05, "loss": 0.6135, "step": 7352, "task_loss": 0.8196436762809753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6755408644676208, "epoch": 6.22, "learning_rate": 1.8922231614539308e-05, "loss": 0.6164, "step": 7353, "task_loss": 1.0270711183547974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7381584644317627, "epoch": 6.22, "learning_rate": 1.8918005071851228e-05, "loss": 0.5472, "step": 7354, "task_loss": 1.1247092485427856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6968719959259033, "epoch": 6.22, "learning_rate": 1.8913778529163144e-05, "loss": 0.4274, "step": 7355, "task_loss": 0.5901756286621094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.352206826210022, "epoch": 6.22, "learning_rate": 1.8909551986475064e-05, "loss": 0.4541, "step": 7356, "task_loss": 0.8579646944999695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42863553762435913, "epoch": 6.22, "learning_rate": 1.8905325443786984e-05, "loss": 0.3989, "step": 7357, "task_loss": 0.27568715810775757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5466806888580322, "epoch": 6.22, "learning_rate": 1.89010989010989e-05, "loss": 0.4663, "step": 7358, "task_loss": 0.26437240839004517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30731597542762756, "epoch": 6.22, "learning_rate": 1.8896872358410823e-05, "loss": 0.3161, "step": 7359, "task_loss": 1.0283024311065674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39132875204086304, "epoch": 6.22, "learning_rate": 1.889264581572274e-05, "loss": 0.3396, "step": 7360, "task_loss": 0.43007567524909973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5204179286956787, "epoch": 6.22, "learning_rate": 1.888841927303466e-05, "loss": 0.4599, "step": 7361, "task_loss": 0.6762171387672424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40995627641677856, "epoch": 6.22, "learning_rate": 1.888419273034658e-05, "loss": 0.3663, "step": 7362, "task_loss": 0.5207911729812622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19905410706996918, "epoch": 6.22, "learning_rate": 1.8879966187658495e-05, "loss": 0.4125, "step": 7363, "task_loss": 0.577556848526001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7188290357589722, "epoch": 6.22, "learning_rate": 1.8875739644970415e-05, "loss": 0.5165, "step": 7364, "task_loss": 0.7827625274658203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5546907186508179, "epoch": 6.23, "learning_rate": 1.8871513102282335e-05, "loss": 0.5043, "step": 7365, "task_loss": 0.6529926657676697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14544931054115295, "epoch": 6.23, "learning_rate": 1.886728655959425e-05, "loss": 0.4328, "step": 7366, "task_loss": 0.0627792701125145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39158856868743896, "epoch": 6.23, "learning_rate": 1.8863060016906174e-05, "loss": 0.4291, "step": 7367, "task_loss": 0.7068882584571838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2912377715110779, "epoch": 6.23, "learning_rate": 1.885883347421809e-05, "loss": 0.3664, "step": 7368, "task_loss": 0.29457083344459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3838551342487335, "epoch": 6.23, "learning_rate": 1.8854606931530007e-05, "loss": 0.4302, "step": 7369, "task_loss": 0.605881929397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5682333707809448, "epoch": 6.23, "learning_rate": 1.885038038884193e-05, "loss": 0.4513, "step": 7370, "task_loss": 0.5042519569396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6683791875839233, "epoch": 6.23, "learning_rate": 1.8846153846153846e-05, "loss": 0.5397, "step": 7371, "task_loss": 0.6774916648864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4389325976371765, "epoch": 6.23, "learning_rate": 1.8841927303465766e-05, "loss": 0.4626, "step": 7372, "task_loss": 0.27493783831596375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8418166637420654, "epoch": 6.23, "learning_rate": 1.8837700760777686e-05, "loss": 0.4297, "step": 7373, "task_loss": 1.1720597743988037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5171328783035278, "epoch": 6.23, "learning_rate": 1.8833474218089602e-05, "loss": 0.4441, "step": 7374, "task_loss": 0.5749563574790955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4629538059234619, "epoch": 6.23, "learning_rate": 1.8829247675401522e-05, "loss": 0.3852, "step": 7375, "task_loss": 0.6483286619186401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4664277136325836, "epoch": 6.23, "learning_rate": 1.882502113271344e-05, "loss": 0.5763, "step": 7376, "task_loss": 0.945827066898346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49366310238838196, "epoch": 6.24, "learning_rate": 1.8820794590025358e-05, "loss": 0.4849, "step": 7377, "task_loss": 0.6275193095207214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395857810974121, "epoch": 6.24, "learning_rate": 1.881656804733728e-05, "loss": 0.5228, "step": 7378, "task_loss": 0.7266091704368591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3572564721107483, "epoch": 6.24, "learning_rate": 1.8812341504649197e-05, "loss": 0.5073, "step": 7379, "task_loss": 0.5705186724662781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28859540820121765, "epoch": 6.24, "learning_rate": 1.8808114961961117e-05, "loss": 0.3876, "step": 7380, "task_loss": 0.5783632397651672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.381360799074173, "epoch": 6.24, "learning_rate": 1.8803888419273037e-05, "loss": 0.4359, "step": 7381, "task_loss": 1.1658483743667603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5316799879074097, "epoch": 6.24, "learning_rate": 1.8799661876584953e-05, "loss": 0.5222, "step": 7382, "task_loss": 0.44763651490211487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4617660641670227, "epoch": 6.24, "learning_rate": 1.8795435333896873e-05, "loss": 0.4709, "step": 7383, "task_loss": 1.060294508934021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23650670051574707, "epoch": 6.24, "learning_rate": 1.8791208791208793e-05, "loss": 0.397, "step": 7384, "task_loss": 0.11781829595565796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4250749945640564, "epoch": 6.24, "learning_rate": 1.878698224852071e-05, "loss": 0.3795, "step": 7385, "task_loss": 0.5753234028816223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6258448362350464, "epoch": 6.24, "learning_rate": 1.878275570583263e-05, "loss": 0.4174, "step": 7386, "task_loss": 1.0588935613632202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.575982928276062, "epoch": 6.24, "learning_rate": 1.877852916314455e-05, "loss": 0.5323, "step": 7387, "task_loss": 0.9774614572525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5572336912155151, "epoch": 6.24, "learning_rate": 1.877430262045647e-05, "loss": 0.5596, "step": 7388, "task_loss": 0.24310147762298584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35478606820106506, "epoch": 6.25, "learning_rate": 1.8770076077768388e-05, "loss": 0.4239, "step": 7389, "task_loss": 0.7384392023086548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3330371379852295, "epoch": 6.25, "learning_rate": 1.8765849535080304e-05, "loss": 0.3789, "step": 7390, "task_loss": 0.21040931344032288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6635715365409851, "epoch": 6.25, "learning_rate": 1.8761622992392224e-05, "loss": 0.4656, "step": 7391, "task_loss": 0.5885238647460938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.500828206539154, "epoch": 6.25, "learning_rate": 1.8757396449704144e-05, "loss": 0.3904, "step": 7392, "task_loss": 0.7408778071403503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5223671197891235, "epoch": 6.25, "learning_rate": 1.875316990701606e-05, "loss": 0.5171, "step": 7393, "task_loss": 1.0610510110855103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18094712495803833, "epoch": 6.25, "learning_rate": 1.874894336432798e-05, "loss": 0.3785, "step": 7394, "task_loss": 0.6333187818527222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6549341678619385, "epoch": 6.25, "learning_rate": 1.87447168216399e-05, "loss": 0.4936, "step": 7395, "task_loss": 0.7159588932991028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2940424680709839, "epoch": 6.25, "learning_rate": 1.874049027895182e-05, "loss": 0.3755, "step": 7396, "task_loss": 0.160593181848526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4882940649986267, "epoch": 6.25, "learning_rate": 1.8736263736263736e-05, "loss": 0.4114, "step": 7397, "task_loss": 0.9935579895973206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.528218686580658, "epoch": 6.25, "learning_rate": 1.8732037193575656e-05, "loss": 0.4726, "step": 7398, "task_loss": 0.09689510613679886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6261005401611328, "epoch": 6.25, "learning_rate": 1.8727810650887575e-05, "loss": 0.4897, "step": 7399, "task_loss": 1.3823537826538086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29487594962120056, "epoch": 6.26, "learning_rate": 1.8723584108199495e-05, "loss": 0.3328, "step": 7400, "task_loss": 0.7492702603340149 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4351596236228943, "epoch": 6.26, "learning_rate": 1.871935756551141e-05, "loss": 0.4276, "step": 7401, "task_loss": 0.36651086807250977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5562874674797058, "epoch": 6.26, "learning_rate": 1.871513102282333e-05, "loss": 0.3936, "step": 7402, "task_loss": 0.32533732056617737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2341289520263672, "epoch": 6.26, "learning_rate": 1.871090448013525e-05, "loss": 0.3087, "step": 7403, "task_loss": 0.3589152693748474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46211016178131104, "epoch": 6.26, "learning_rate": 1.870667793744717e-05, "loss": 0.4089, "step": 7404, "task_loss": 0.07596525549888611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31350022554397583, "epoch": 6.26, "learning_rate": 1.8702451394759087e-05, "loss": 0.4579, "step": 7405, "task_loss": 0.736756443977356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3533334732055664, "epoch": 6.26, "learning_rate": 1.8698224852071007e-05, "loss": 0.461, "step": 7406, "task_loss": 0.7795369625091553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36756011843681335, "epoch": 6.26, "learning_rate": 1.8693998309382926e-05, "loss": 0.3735, "step": 7407, "task_loss": 0.20933058857917786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43795180320739746, "epoch": 6.26, "learning_rate": 1.8689771766694843e-05, "loss": 0.5907, "step": 7408, "task_loss": 0.47002172470092773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5881123542785645, "epoch": 6.26, "learning_rate": 1.8685545224006766e-05, "loss": 0.4426, "step": 7409, "task_loss": 0.9388710260391235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5761983394622803, "epoch": 6.26, "learning_rate": 1.8681318681318682e-05, "loss": 0.5937, "step": 7410, "task_loss": 0.7771567106246948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4625416398048401, "epoch": 6.26, "learning_rate": 1.86770921386306e-05, "loss": 0.3953, "step": 7411, "task_loss": 0.5007261633872986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25369852781295776, "epoch": 6.27, "learning_rate": 1.8672865595942522e-05, "loss": 0.3196, "step": 7412, "task_loss": 0.7372127771377563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39217662811279297, "epoch": 6.27, "learning_rate": 1.8668639053254438e-05, "loss": 0.567, "step": 7413, "task_loss": 0.9854080080986023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28860050439834595, "epoch": 6.27, "learning_rate": 1.8664412510566358e-05, "loss": 0.3232, "step": 7414, "task_loss": 0.4750038981437683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4465206563472748, "epoch": 6.27, "learning_rate": 1.8660185967878278e-05, "loss": 0.4259, "step": 7415, "task_loss": 0.5471699833869934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6128970384597778, "epoch": 6.27, "learning_rate": 1.8655959425190194e-05, "loss": 0.5968, "step": 7416, "task_loss": 1.4372280836105347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7934972047805786, "epoch": 6.27, "learning_rate": 1.8651732882502117e-05, "loss": 0.5329, "step": 7417, "task_loss": 0.6570456027984619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30409589409828186, "epoch": 6.27, "learning_rate": 1.8647506339814033e-05, "loss": 0.3209, "step": 7418, "task_loss": 0.16773465275764465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25856998562812805, "epoch": 6.27, "learning_rate": 1.864327979712595e-05, "loss": 0.3579, "step": 7419, "task_loss": 1.1813669204711914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6655339002609253, "epoch": 6.27, "learning_rate": 1.8639053254437873e-05, "loss": 0.6131, "step": 7420, "task_loss": 1.254402995109558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36151522397994995, "epoch": 6.27, "learning_rate": 1.863482671174979e-05, "loss": 0.4679, "step": 7421, "task_loss": 0.874910295009613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4015306830406189, "epoch": 6.27, "learning_rate": 1.8630600169061706e-05, "loss": 0.4121, "step": 7422, "task_loss": 0.4424337148666382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4657759368419647, "epoch": 6.27, "learning_rate": 1.862637362637363e-05, "loss": 0.2997, "step": 7423, "task_loss": 0.4597998857498169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.263441801071167, "epoch": 6.28, "learning_rate": 1.8622147083685545e-05, "loss": 0.3745, "step": 7424, "task_loss": 0.435884565114975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3490613102912903, "epoch": 6.28, "learning_rate": 1.8617920540997465e-05, "loss": 0.4627, "step": 7425, "task_loss": 1.2124418020248413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25691765546798706, "epoch": 6.28, "learning_rate": 1.8613693998309385e-05, "loss": 0.3247, "step": 7426, "task_loss": 0.3420126736164093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5784562826156616, "epoch": 6.28, "learning_rate": 1.86094674556213e-05, "loss": 0.4764, "step": 7427, "task_loss": 0.6915899515151978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3411710262298584, "epoch": 6.28, "learning_rate": 1.860524091293322e-05, "loss": 0.458, "step": 7428, "task_loss": 0.3122914731502533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3249645233154297, "epoch": 6.28, "learning_rate": 1.860101437024514e-05, "loss": 0.3601, "step": 7429, "task_loss": 0.09959448873996735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22226080298423767, "epoch": 6.28, "learning_rate": 1.8596787827557057e-05, "loss": 0.3633, "step": 7430, "task_loss": 0.3487250506877899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6091804504394531, "epoch": 6.28, "learning_rate": 1.859256128486898e-05, "loss": 0.4972, "step": 7431, "task_loss": 0.4621986448764801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48916590213775635, "epoch": 6.28, "learning_rate": 1.8588334742180896e-05, "loss": 0.4447, "step": 7432, "task_loss": 0.557756781578064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1964636594057083, "epoch": 6.28, "learning_rate": 1.8584108199492816e-05, "loss": 0.3794, "step": 7433, "task_loss": 0.3018451929092407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7813000679016113, "epoch": 6.28, "learning_rate": 1.8579881656804736e-05, "loss": 0.5741, "step": 7434, "task_loss": 0.18632301688194275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27533307671546936, "epoch": 6.28, "learning_rate": 1.8575655114116652e-05, "loss": 0.4389, "step": 7435, "task_loss": 0.09850893169641495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20819804072380066, "epoch": 6.29, "learning_rate": 1.8571428571428572e-05, "loss": 0.3719, "step": 7436, "task_loss": 0.022275356575846672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5539222955703735, "epoch": 6.29, "learning_rate": 1.856720202874049e-05, "loss": 0.5492, "step": 7437, "task_loss": 0.5539168119430542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5495555400848389, "epoch": 6.29, "learning_rate": 1.856297548605241e-05, "loss": 0.5626, "step": 7438, "task_loss": 0.12057632207870483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3981839418411255, "epoch": 6.29, "learning_rate": 1.8558748943364328e-05, "loss": 0.4617, "step": 7439, "task_loss": 0.33110612630844116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6272201538085938, "epoch": 6.29, "learning_rate": 1.8554522400676247e-05, "loss": 0.4235, "step": 7440, "task_loss": 1.143898606300354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37437906861305237, "epoch": 6.29, "learning_rate": 1.8550295857988167e-05, "loss": 0.408, "step": 7441, "task_loss": 0.6268939971923828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4869462847709656, "epoch": 6.29, "learning_rate": 1.8546069315300087e-05, "loss": 0.4792, "step": 7442, "task_loss": 1.1253035068511963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5036808252334595, "epoch": 6.29, "learning_rate": 1.8541842772612003e-05, "loss": 0.5781, "step": 7443, "task_loss": 1.171592116355896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4199615716934204, "epoch": 6.29, "learning_rate": 1.8537616229923923e-05, "loss": 0.4245, "step": 7444, "task_loss": 0.049844495952129364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.161430224776268, "epoch": 6.29, "learning_rate": 1.8533389687235843e-05, "loss": 0.4761, "step": 7445, "task_loss": 0.11534826457500458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4419282376766205, "epoch": 6.29, "learning_rate": 1.8529163144547762e-05, "loss": 0.368, "step": 7446, "task_loss": 0.25295397639274597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43667957186698914, "epoch": 6.29, "learning_rate": 1.852493660185968e-05, "loss": 0.4716, "step": 7447, "task_loss": 0.23077793419361115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4362657070159912, "epoch": 6.3, "learning_rate": 1.85207100591716e-05, "loss": 0.4857, "step": 7448, "task_loss": 0.7477615475654602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5921239852905273, "epoch": 6.3, "learning_rate": 1.8516483516483518e-05, "loss": 0.4881, "step": 7449, "task_loss": 0.7867098450660706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4152717590332031, "epoch": 6.3, "learning_rate": 1.8512256973795435e-05, "loss": 0.3956, "step": 7450, "task_loss": 0.6226378083229065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45414963364601135, "epoch": 6.3, "learning_rate": 1.8508030431107354e-05, "loss": 0.4763, "step": 7451, "task_loss": 0.8678309917449951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3037633001804352, "epoch": 6.3, "learning_rate": 1.8503803888419274e-05, "loss": 0.4174, "step": 7452, "task_loss": 0.057907044887542725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3846437335014343, "epoch": 6.3, "learning_rate": 1.8499577345731194e-05, "loss": 0.3531, "step": 7453, "task_loss": 0.5084137916564941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47334036231040955, "epoch": 6.3, "learning_rate": 1.8495350803043113e-05, "loss": 0.3884, "step": 7454, "task_loss": 0.6242532730102539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.538460910320282, "epoch": 6.3, "learning_rate": 1.849112426035503e-05, "loss": 0.4844, "step": 7455, "task_loss": 0.9625957012176514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3545105457305908, "epoch": 6.3, "learning_rate": 1.848689771766695e-05, "loss": 0.4091, "step": 7456, "task_loss": 0.3145792484283447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5496225953102112, "epoch": 6.3, "learning_rate": 1.848267117497887e-05, "loss": 0.4465, "step": 7457, "task_loss": 0.9730275273323059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4006803631782532, "epoch": 6.3, "learning_rate": 1.8478444632290786e-05, "loss": 0.5432, "step": 7458, "task_loss": 0.5437538623809814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3841736912727356, "epoch": 6.3, "learning_rate": 1.8474218089602705e-05, "loss": 0.4372, "step": 7459, "task_loss": 0.7839620113372803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4889374077320099, "epoch": 6.31, "learning_rate": 1.8469991546914625e-05, "loss": 0.37, "step": 7460, "task_loss": 0.6489946246147156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23489783704280853, "epoch": 6.31, "learning_rate": 1.846576500422654e-05, "loss": 0.3055, "step": 7461, "task_loss": 0.5485289096832275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39727866649627686, "epoch": 6.31, "learning_rate": 1.8461538461538465e-05, "loss": 0.3871, "step": 7462, "task_loss": 0.44308724999427795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.209807351231575, "epoch": 6.31, "learning_rate": 1.845731191885038e-05, "loss": 0.4402, "step": 7463, "task_loss": 0.09618355333805084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2060011327266693, "epoch": 6.31, "learning_rate": 1.84530853761623e-05, "loss": 0.3076, "step": 7464, "task_loss": 0.6820746660232544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2552296221256256, "epoch": 6.31, "learning_rate": 1.844885883347422e-05, "loss": 0.3227, "step": 7465, "task_loss": 0.45945101976394653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28092581033706665, "epoch": 6.31, "learning_rate": 1.8444632290786137e-05, "loss": 0.4415, "step": 7466, "task_loss": 0.6931418180465698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38539350032806396, "epoch": 6.31, "learning_rate": 1.8440405748098057e-05, "loss": 0.4929, "step": 7467, "task_loss": 0.7239944934844971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4543333947658539, "epoch": 6.31, "learning_rate": 1.8436179205409976e-05, "loss": 0.3338, "step": 7468, "task_loss": 0.6084698438644409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31763705611228943, "epoch": 6.31, "learning_rate": 1.8431952662721893e-05, "loss": 0.4451, "step": 7469, "task_loss": 0.12259476631879807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3203537166118622, "epoch": 6.31, "learning_rate": 1.8427726120033816e-05, "loss": 0.47, "step": 7470, "task_loss": 0.08740745484828949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40922850370407104, "epoch": 6.32, "learning_rate": 1.8423499577345732e-05, "loss": 0.4579, "step": 7471, "task_loss": 0.587205171585083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4450766444206238, "epoch": 6.32, "learning_rate": 1.841927303465765e-05, "loss": 0.3305, "step": 7472, "task_loss": 0.3386770188808441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3301679491996765, "epoch": 6.32, "learning_rate": 1.841504649196957e-05, "loss": 0.4391, "step": 7473, "task_loss": 0.5538355112075806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.599484384059906, "epoch": 6.32, "learning_rate": 1.8410819949281488e-05, "loss": 0.5186, "step": 7474, "task_loss": 0.7517311573028564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5912399291992188, "epoch": 6.32, "learning_rate": 1.8406593406593408e-05, "loss": 0.5197, "step": 7475, "task_loss": 0.9786093831062317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6440020799636841, "epoch": 6.32, "learning_rate": 1.8402366863905327e-05, "loss": 0.4693, "step": 7476, "task_loss": 0.3304566740989685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3821730613708496, "epoch": 6.32, "learning_rate": 1.8398140321217244e-05, "loss": 0.4206, "step": 7477, "task_loss": 0.11487775295972824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28783780336380005, "epoch": 6.32, "learning_rate": 1.8393913778529163e-05, "loss": 0.4087, "step": 7478, "task_loss": 0.49114659428596497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5587480068206787, "epoch": 6.32, "learning_rate": 1.8389687235841083e-05, "loss": 0.4221, "step": 7479, "task_loss": 0.9028792381286621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5051053166389465, "epoch": 6.32, "learning_rate": 1.8385460693153e-05, "loss": 0.4592, "step": 7480, "task_loss": 1.2609680891036987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38354945182800293, "epoch": 6.32, "learning_rate": 1.8381234150464923e-05, "loss": 0.4097, "step": 7481, "task_loss": 0.3581293523311615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5226291418075562, "epoch": 6.32, "learning_rate": 1.837700760777684e-05, "loss": 0.5195, "step": 7482, "task_loss": 0.7903541326522827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43954920768737793, "epoch": 6.33, "learning_rate": 1.837278106508876e-05, "loss": 0.3811, "step": 7483, "task_loss": 1.0014567375183105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39188283681869507, "epoch": 6.33, "learning_rate": 1.836855452240068e-05, "loss": 0.4197, "step": 7484, "task_loss": 0.70308518409729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6716598272323608, "epoch": 6.33, "learning_rate": 1.8364327979712595e-05, "loss": 0.3651, "step": 7485, "task_loss": 0.5552574396133423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33321717381477356, "epoch": 6.33, "learning_rate": 1.8360101437024515e-05, "loss": 0.3888, "step": 7486, "task_loss": 0.42140358686447144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29450124502182007, "epoch": 6.33, "learning_rate": 1.8355874894336434e-05, "loss": 0.4155, "step": 7487, "task_loss": 0.6798124313354492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2847437858581543, "epoch": 6.33, "learning_rate": 1.835164835164835e-05, "loss": 0.4167, "step": 7488, "task_loss": 0.2796574532985687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27172374725341797, "epoch": 6.33, "learning_rate": 1.834742180896027e-05, "loss": 0.3815, "step": 7489, "task_loss": 0.09973116219043732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5415390729904175, "epoch": 6.33, "learning_rate": 1.834319526627219e-05, "loss": 0.4668, "step": 7490, "task_loss": 0.14894388616085052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4161244034767151, "epoch": 6.33, "learning_rate": 1.833896872358411e-05, "loss": 0.4886, "step": 7491, "task_loss": 0.42361685633659363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1837972104549408, "epoch": 6.33, "learning_rate": 1.8334742180896026e-05, "loss": 0.3756, "step": 7492, "task_loss": 0.2381788045167923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42369329929351807, "epoch": 6.33, "learning_rate": 1.8330515638207946e-05, "loss": 0.4023, "step": 7493, "task_loss": 0.6435173153877258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4651643633842468, "epoch": 6.33, "learning_rate": 1.8326289095519866e-05, "loss": 0.4336, "step": 7494, "task_loss": 0.5296189188957214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2605997920036316, "epoch": 6.34, "learning_rate": 1.8322062552831785e-05, "loss": 0.3674, "step": 7495, "task_loss": 0.893089771270752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8749069571495056, "epoch": 6.34, "learning_rate": 1.8317836010143705e-05, "loss": 0.494, "step": 7496, "task_loss": 0.9435193538665771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5157427191734314, "epoch": 6.34, "learning_rate": 1.831360946745562e-05, "loss": 0.4746, "step": 7497, "task_loss": 0.6275808215141296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28652673959732056, "epoch": 6.34, "learning_rate": 1.830938292476754e-05, "loss": 0.4654, "step": 7498, "task_loss": 0.36687055230140686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47160595655441284, "epoch": 6.34, "learning_rate": 1.830515638207946e-05, "loss": 0.4602, "step": 7499, "task_loss": 0.590064525604248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5901178121566772, "epoch": 6.34, "learning_rate": 1.8300929839391377e-05, "loss": 0.4143, "step": 7500, "task_loss": 1.0041007995605469 }, { "epoch": 6.34, "eval_accuracy": 0.9095445544554456, "eval_loss": 0.2958400845527649, "eval_runtime": 228.4673, "eval_samples_per_second": 110.519, "eval_steps_per_second": 0.867, "step": 7500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45036858320236206, "epoch": 6.34, "learning_rate": 1.8296703296703297e-05, "loss": 0.386, "step": 7501, "task_loss": 1.1028269529342651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4629344940185547, "epoch": 6.34, "learning_rate": 1.8292476754015217e-05, "loss": 0.5115, "step": 7502, "task_loss": 0.9906623363494873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35838794708251953, "epoch": 6.34, "learning_rate": 1.8288250211327133e-05, "loss": 0.3938, "step": 7503, "task_loss": 0.8167701363563538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.216829314827919, "epoch": 6.34, "learning_rate": 1.8284023668639056e-05, "loss": 0.3553, "step": 7504, "task_loss": 0.158543199300766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.402401864528656, "epoch": 6.34, "learning_rate": 1.8279797125950973e-05, "loss": 0.3801, "step": 7505, "task_loss": 0.20732076466083527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5993769764900208, "epoch": 6.34, "learning_rate": 1.8275570583262892e-05, "loss": 0.5207, "step": 7506, "task_loss": 0.7096792459487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24969404935836792, "epoch": 6.35, "learning_rate": 1.8271344040574812e-05, "loss": 0.3842, "step": 7507, "task_loss": 0.10812999308109283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.307307630777359, "epoch": 6.35, "learning_rate": 1.826711749788673e-05, "loss": 0.4051, "step": 7508, "task_loss": 1.055645227432251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21905399858951569, "epoch": 6.35, "learning_rate": 1.8262890955198648e-05, "loss": 0.3703, "step": 7509, "task_loss": 0.27905556559562683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7335200309753418, "epoch": 6.35, "learning_rate": 1.8258664412510568e-05, "loss": 0.5096, "step": 7510, "task_loss": 0.6932314038276672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23352962732315063, "epoch": 6.35, "learning_rate": 1.8254437869822484e-05, "loss": 0.3776, "step": 7511, "task_loss": 0.8729584813117981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2948322296142578, "epoch": 6.35, "learning_rate": 1.8250211327134407e-05, "loss": 0.3892, "step": 7512, "task_loss": 0.18286819756031036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3483908176422119, "epoch": 6.35, "learning_rate": 1.8245984784446324e-05, "loss": 0.335, "step": 7513, "task_loss": 0.3157382905483246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.527278482913971, "epoch": 6.35, "learning_rate": 1.824175824175824e-05, "loss": 0.4481, "step": 7514, "task_loss": 0.46831732988357544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2302231341600418, "epoch": 6.35, "learning_rate": 1.8237531699070163e-05, "loss": 0.3349, "step": 7515, "task_loss": 0.5565590262413025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4539869427680969, "epoch": 6.35, "learning_rate": 1.823330515638208e-05, "loss": 0.4749, "step": 7516, "task_loss": 0.6743857860565186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3175102174282074, "epoch": 6.35, "learning_rate": 1.8229078613694e-05, "loss": 0.4083, "step": 7517, "task_loss": 0.31341618299484253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2807077169418335, "epoch": 6.35, "learning_rate": 1.822485207100592e-05, "loss": 0.3712, "step": 7518, "task_loss": 0.28901487588882446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37975335121154785, "epoch": 6.36, "learning_rate": 1.8220625528317836e-05, "loss": 0.3771, "step": 7519, "task_loss": 0.09109170734882355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48064905405044556, "epoch": 6.36, "learning_rate": 1.8216398985629755e-05, "loss": 0.6656, "step": 7520, "task_loss": 0.7549939155578613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48382243514060974, "epoch": 6.36, "learning_rate": 1.8212172442941675e-05, "loss": 0.4908, "step": 7521, "task_loss": 0.6553642749786377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8987942337989807, "epoch": 6.36, "learning_rate": 1.820794590025359e-05, "loss": 0.5819, "step": 7522, "task_loss": 1.512727975845337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31782814860343933, "epoch": 6.36, "learning_rate": 1.8203719357565514e-05, "loss": 0.4287, "step": 7523, "task_loss": 0.025357339531183243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4128161072731018, "epoch": 6.36, "learning_rate": 1.819949281487743e-05, "loss": 0.4255, "step": 7524, "task_loss": 0.6650553941726685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29444167017936707, "epoch": 6.36, "learning_rate": 1.819526627218935e-05, "loss": 0.4643, "step": 7525, "task_loss": 0.5308310985565186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5274129509925842, "epoch": 6.36, "learning_rate": 1.819103972950127e-05, "loss": 0.4392, "step": 7526, "task_loss": 1.0899982452392578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3549845814704895, "epoch": 6.36, "learning_rate": 1.8186813186813187e-05, "loss": 0.3506, "step": 7527, "task_loss": 0.15445595979690552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3621762990951538, "epoch": 6.36, "learning_rate": 1.8182586644125106e-05, "loss": 0.4189, "step": 7528, "task_loss": 0.4782595932483673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36561524868011475, "epoch": 6.36, "learning_rate": 1.8178360101437026e-05, "loss": 0.4063, "step": 7529, "task_loss": 0.9964373707771301 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5830185413360596, "epoch": 6.36, "learning_rate": 1.8174133558748942e-05, "loss": 0.5059, "step": 7530, "task_loss": 0.39107224345207214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2264605313539505, "epoch": 6.37, "learning_rate": 1.8169907016060862e-05, "loss": 0.3731, "step": 7531, "task_loss": 0.14437159895896912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42552751302719116, "epoch": 6.37, "learning_rate": 1.8165680473372782e-05, "loss": 0.4535, "step": 7532, "task_loss": 0.8140941858291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6502187252044678, "epoch": 6.37, "learning_rate": 1.81614539306847e-05, "loss": 0.5448, "step": 7533, "task_loss": 0.17116880416870117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33767756819725037, "epoch": 6.37, "learning_rate": 1.815722738799662e-05, "loss": 0.4523, "step": 7534, "task_loss": 0.5321668982505798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45479732751846313, "epoch": 6.37, "learning_rate": 1.8153000845308538e-05, "loss": 0.3906, "step": 7535, "task_loss": 0.5577519536018372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33437007665634155, "epoch": 6.37, "learning_rate": 1.8148774302620458e-05, "loss": 0.4633, "step": 7536, "task_loss": 0.2395503968000412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22952744364738464, "epoch": 6.37, "learning_rate": 1.8144547759932377e-05, "loss": 0.4032, "step": 7537, "task_loss": 0.23318517208099365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47617363929748535, "epoch": 6.37, "learning_rate": 1.8140321217244294e-05, "loss": 0.4404, "step": 7538, "task_loss": 0.902389645576477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34737879037857056, "epoch": 6.37, "learning_rate": 1.8136094674556213e-05, "loss": 0.492, "step": 7539, "task_loss": 0.842831552028656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39803364872932434, "epoch": 6.37, "learning_rate": 1.8131868131868133e-05, "loss": 0.3803, "step": 7540, "task_loss": 0.3733649253845215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2669640779495239, "epoch": 6.37, "learning_rate": 1.8127641589180053e-05, "loss": 0.4487, "step": 7541, "task_loss": 0.09107401967048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3468136787414551, "epoch": 6.38, "learning_rate": 1.812341504649197e-05, "loss": 0.3951, "step": 7542, "task_loss": 0.24380576610565186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35958659648895264, "epoch": 6.38, "learning_rate": 1.811918850380389e-05, "loss": 0.4175, "step": 7543, "task_loss": 0.4848724901676178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4767422676086426, "epoch": 6.38, "learning_rate": 1.811496196111581e-05, "loss": 0.4568, "step": 7544, "task_loss": 1.3296947479248047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30554869771003723, "epoch": 6.38, "learning_rate": 1.811073541842773e-05, "loss": 0.4104, "step": 7545, "task_loss": 0.873447597026825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27128180861473083, "epoch": 6.38, "learning_rate": 1.8106508875739645e-05, "loss": 0.3371, "step": 7546, "task_loss": 0.23995764553546906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30541348457336426, "epoch": 6.38, "learning_rate": 1.8102282333051564e-05, "loss": 0.3588, "step": 7547, "task_loss": 1.0768522024154663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7742637991905212, "epoch": 6.38, "learning_rate": 1.8098055790363484e-05, "loss": 0.5042, "step": 7548, "task_loss": 0.9513492584228516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6447891592979431, "epoch": 6.38, "learning_rate": 1.8093829247675404e-05, "loss": 0.471, "step": 7549, "task_loss": 0.29645898938179016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23384442925453186, "epoch": 6.38, "learning_rate": 1.808960270498732e-05, "loss": 0.3213, "step": 7550, "task_loss": 0.07928332686424255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46045786142349243, "epoch": 6.38, "learning_rate": 1.808537616229924e-05, "loss": 0.4692, "step": 7551, "task_loss": 1.2746710777282715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47530269622802734, "epoch": 6.38, "learning_rate": 1.808114961961116e-05, "loss": 0.3891, "step": 7552, "task_loss": 0.2463846504688263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3869553804397583, "epoch": 6.38, "learning_rate": 1.8076923076923076e-05, "loss": 0.3293, "step": 7553, "task_loss": 1.1909068822860718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25528866052627563, "epoch": 6.39, "learning_rate": 1.8072696534235e-05, "loss": 0.4568, "step": 7554, "task_loss": 1.2912665605545044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3567180633544922, "epoch": 6.39, "learning_rate": 1.8068469991546916e-05, "loss": 0.4169, "step": 7555, "task_loss": 0.6024770140647888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4185871183872223, "epoch": 6.39, "learning_rate": 1.8064243448858832e-05, "loss": 0.4757, "step": 7556, "task_loss": 0.3790021538734436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6990261673927307, "epoch": 6.39, "learning_rate": 1.8060016906170755e-05, "loss": 0.6151, "step": 7557, "task_loss": 0.6352113485336304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41373658180236816, "epoch": 6.39, "learning_rate": 1.805579036348267e-05, "loss": 0.4275, "step": 7558, "task_loss": 0.8403230905532837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25877469778060913, "epoch": 6.39, "learning_rate": 1.805156382079459e-05, "loss": 0.413, "step": 7559, "task_loss": 0.801657497882843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.670606255531311, "epoch": 6.39, "learning_rate": 1.804733727810651e-05, "loss": 0.4919, "step": 7560, "task_loss": 0.5019938945770264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5482395887374878, "epoch": 6.39, "learning_rate": 1.8043110735418427e-05, "loss": 0.4876, "step": 7561, "task_loss": 1.5432777404785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45872238278388977, "epoch": 6.39, "learning_rate": 1.8038884192730347e-05, "loss": 0.3391, "step": 7562, "task_loss": 0.7343668937683105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21897876262664795, "epoch": 6.39, "learning_rate": 1.8034657650042267e-05, "loss": 0.5076, "step": 7563, "task_loss": 0.18497462570667267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6142939925193787, "epoch": 6.39, "learning_rate": 1.8030431107354183e-05, "loss": 0.5886, "step": 7564, "task_loss": 0.24114851653575897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5178631544113159, "epoch": 6.39, "learning_rate": 1.8026204564666106e-05, "loss": 0.4778, "step": 7565, "task_loss": 0.7954056262969971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4254770874977112, "epoch": 6.4, "learning_rate": 1.8021978021978023e-05, "loss": 0.5199, "step": 7566, "task_loss": 0.8415508270263672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41067150235176086, "epoch": 6.4, "learning_rate": 1.801775147928994e-05, "loss": 0.375, "step": 7567, "task_loss": 0.77430659532547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5092525482177734, "epoch": 6.4, "learning_rate": 1.8013524936601862e-05, "loss": 0.4736, "step": 7568, "task_loss": 0.7892408967018127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3329617381095886, "epoch": 6.4, "learning_rate": 1.800929839391378e-05, "loss": 0.4791, "step": 7569, "task_loss": 0.0758054181933403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2235378623008728, "epoch": 6.4, "learning_rate": 1.8005071851225698e-05, "loss": 0.4112, "step": 7570, "task_loss": 0.9776051044464111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42747461795806885, "epoch": 6.4, "learning_rate": 1.8000845308537618e-05, "loss": 0.4326, "step": 7571, "task_loss": 0.1078256219625473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6441366076469421, "epoch": 6.4, "learning_rate": 1.7996618765849534e-05, "loss": 0.475, "step": 7572, "task_loss": 1.3679238557815552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3199275732040405, "epoch": 6.4, "learning_rate": 1.7992392223161454e-05, "loss": 0.5162, "step": 7573, "task_loss": 0.7930371165275574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4770206809043884, "epoch": 6.4, "learning_rate": 1.7988165680473374e-05, "loss": 0.5258, "step": 7574, "task_loss": 1.112716555595398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5162943005561829, "epoch": 6.4, "learning_rate": 1.798393913778529e-05, "loss": 0.4992, "step": 7575, "task_loss": 1.3846509456634521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.660395085811615, "epoch": 6.4, "learning_rate": 1.7979712595097213e-05, "loss": 0.51, "step": 7576, "task_loss": 0.30320313572883606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3691035211086273, "epoch": 6.4, "learning_rate": 1.797548605240913e-05, "loss": 0.3764, "step": 7577, "task_loss": 0.11380445212125778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2644013464450836, "epoch": 6.41, "learning_rate": 1.797125950972105e-05, "loss": 0.4863, "step": 7578, "task_loss": 0.2990926504135132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35361629724502563, "epoch": 6.41, "learning_rate": 1.796703296703297e-05, "loss": 0.4292, "step": 7579, "task_loss": 0.6158581972122192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5587254762649536, "epoch": 6.41, "learning_rate": 1.7962806424344885e-05, "loss": 0.4575, "step": 7580, "task_loss": 0.30703744292259216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6294219493865967, "epoch": 6.41, "learning_rate": 1.7958579881656805e-05, "loss": 0.4484, "step": 7581, "task_loss": 0.9403088688850403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.503575325012207, "epoch": 6.41, "learning_rate": 1.7954353338968725e-05, "loss": 0.4307, "step": 7582, "task_loss": 0.2523866593837738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4844515025615692, "epoch": 6.41, "learning_rate": 1.7950126796280645e-05, "loss": 0.4503, "step": 7583, "task_loss": 0.757355272769928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1727587729692459, "epoch": 6.41, "learning_rate": 1.794590025359256e-05, "loss": 0.3645, "step": 7584, "task_loss": 0.3840113878250122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25857457518577576, "epoch": 6.41, "learning_rate": 1.794167371090448e-05, "loss": 0.3318, "step": 7585, "task_loss": 0.6746025681495667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7401986122131348, "epoch": 6.41, "learning_rate": 1.79374471682164e-05, "loss": 0.5505, "step": 7586, "task_loss": 1.4082059860229492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5529409050941467, "epoch": 6.41, "learning_rate": 1.793322062552832e-05, "loss": 0.4898, "step": 7587, "task_loss": 1.2931978702545166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3298785984516144, "epoch": 6.41, "learning_rate": 1.7928994082840236e-05, "loss": 0.3668, "step": 7588, "task_loss": 0.46625396609306335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39309418201446533, "epoch": 6.41, "learning_rate": 1.7924767540152156e-05, "loss": 0.5826, "step": 7589, "task_loss": 1.2388874292373657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47886645793914795, "epoch": 6.42, "learning_rate": 1.7920540997464076e-05, "loss": 0.3442, "step": 7590, "task_loss": 0.1339566707611084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4233910143375397, "epoch": 6.42, "learning_rate": 1.7916314454775996e-05, "loss": 0.418, "step": 7591, "task_loss": 0.7103968262672424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23849478363990784, "epoch": 6.42, "learning_rate": 1.7912087912087912e-05, "loss": 0.4414, "step": 7592, "task_loss": 0.18312296271324158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5746069550514221, "epoch": 6.42, "learning_rate": 1.7907861369399832e-05, "loss": 0.4864, "step": 7593, "task_loss": 1.2621407508850098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49882417917251587, "epoch": 6.42, "learning_rate": 1.790363482671175e-05, "loss": 0.4938, "step": 7594, "task_loss": 0.4164222776889801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5298264622688293, "epoch": 6.42, "learning_rate": 1.7899408284023668e-05, "loss": 0.406, "step": 7595, "task_loss": 0.9813674688339233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40544670820236206, "epoch": 6.42, "learning_rate": 1.7895181741335588e-05, "loss": 0.4754, "step": 7596, "task_loss": 0.13477036356925964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30339473485946655, "epoch": 6.42, "learning_rate": 1.7890955198647507e-05, "loss": 0.2934, "step": 7597, "task_loss": 0.26246318221092224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5171187520027161, "epoch": 6.42, "learning_rate": 1.7886728655959427e-05, "loss": 0.3553, "step": 7598, "task_loss": 0.2201736569404602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3105328381061554, "epoch": 6.42, "learning_rate": 1.7882502113271347e-05, "loss": 0.4203, "step": 7599, "task_loss": 1.019685983657837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7692074775695801, "epoch": 6.42, "learning_rate": 1.7878275570583263e-05, "loss": 0.4443, "step": 7600, "task_loss": 0.588320255279541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44372984766960144, "epoch": 6.42, "learning_rate": 1.7874049027895183e-05, "loss": 0.2985, "step": 7601, "task_loss": 1.1915132999420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30329430103302, "epoch": 6.43, "learning_rate": 1.7869822485207103e-05, "loss": 0.264, "step": 7602, "task_loss": 0.18520188331604004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.576276421546936, "epoch": 6.43, "learning_rate": 1.786559594251902e-05, "loss": 0.5265, "step": 7603, "task_loss": 0.7379361391067505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26150569319725037, "epoch": 6.43, "learning_rate": 1.786136939983094e-05, "loss": 0.3709, "step": 7604, "task_loss": 0.6333028078079224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2518298029899597, "epoch": 6.43, "learning_rate": 1.785714285714286e-05, "loss": 0.3512, "step": 7605, "task_loss": 0.11739340424537659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24102556705474854, "epoch": 6.43, "learning_rate": 1.7852916314454775e-05, "loss": 0.3179, "step": 7606, "task_loss": 0.677313506603241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.376369833946228, "epoch": 6.43, "learning_rate": 1.7848689771766698e-05, "loss": 0.3656, "step": 7607, "task_loss": 0.41247788071632385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3990139365196228, "epoch": 6.43, "learning_rate": 1.7844463229078614e-05, "loss": 0.5136, "step": 7608, "task_loss": 1.203552007675171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35530394315719604, "epoch": 6.43, "learning_rate": 1.7840236686390534e-05, "loss": 0.4456, "step": 7609, "task_loss": 1.049601435661316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44617512822151184, "epoch": 6.43, "learning_rate": 1.7836010143702454e-05, "loss": 0.3467, "step": 7610, "task_loss": 0.31806641817092896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49190235137939453, "epoch": 6.43, "learning_rate": 1.783178360101437e-05, "loss": 0.3908, "step": 7611, "task_loss": 0.7891565561294556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38216811418533325, "epoch": 6.43, "learning_rate": 1.782755705832629e-05, "loss": 0.3783, "step": 7612, "task_loss": 0.45590490102767944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43023359775543213, "epoch": 6.44, "learning_rate": 1.782333051563821e-05, "loss": 0.451, "step": 7613, "task_loss": 0.3245699107646942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5369499921798706, "epoch": 6.44, "learning_rate": 1.7819103972950126e-05, "loss": 0.5567, "step": 7614, "task_loss": 1.775534749031067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6040256023406982, "epoch": 6.44, "learning_rate": 1.781487743026205e-05, "loss": 0.5561, "step": 7615, "task_loss": 0.619125247001648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33821651339530945, "epoch": 6.44, "learning_rate": 1.7810650887573965e-05, "loss": 0.4678, "step": 7616, "task_loss": 0.9404986500740051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5068166255950928, "epoch": 6.44, "learning_rate": 1.7806424344885882e-05, "loss": 0.4455, "step": 7617, "task_loss": 0.20557530224323273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.646491527557373, "epoch": 6.44, "learning_rate": 1.7802197802197805e-05, "loss": 0.4895, "step": 7618, "task_loss": 0.8594582080841064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39502227306365967, "epoch": 6.44, "learning_rate": 1.779797125950972e-05, "loss": 0.4957, "step": 7619, "task_loss": 1.2211828231811523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35524773597717285, "epoch": 6.44, "learning_rate": 1.779374471682164e-05, "loss": 0.423, "step": 7620, "task_loss": 0.5577415823936462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3755262494087219, "epoch": 6.44, "learning_rate": 1.778951817413356e-05, "loss": 0.4732, "step": 7621, "task_loss": 0.07496852427721024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4170519709587097, "epoch": 6.44, "learning_rate": 1.7785291631445477e-05, "loss": 0.4497, "step": 7622, "task_loss": 0.8204951286315918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48731687664985657, "epoch": 6.44, "learning_rate": 1.7781065088757397e-05, "loss": 0.4103, "step": 7623, "task_loss": 0.37467193603515625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6175440549850464, "epoch": 6.44, "learning_rate": 1.7776838546069317e-05, "loss": 0.3962, "step": 7624, "task_loss": 0.7171947956085205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44085437059402466, "epoch": 6.45, "learning_rate": 1.7772612003381233e-05, "loss": 0.4773, "step": 7625, "task_loss": 0.34319713711738586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2469516098499298, "epoch": 6.45, "learning_rate": 1.7768385460693153e-05, "loss": 0.3655, "step": 7626, "task_loss": 0.17120546102523804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.558559775352478, "epoch": 6.45, "learning_rate": 1.7764158918005072e-05, "loss": 0.5575, "step": 7627, "task_loss": 0.863972544670105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4851981997489929, "epoch": 6.45, "learning_rate": 1.7759932375316992e-05, "loss": 0.5302, "step": 7628, "task_loss": 0.6844739317893982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42256858944892883, "epoch": 6.45, "learning_rate": 1.7755705832628912e-05, "loss": 0.3199, "step": 7629, "task_loss": 0.577381432056427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7095748782157898, "epoch": 6.45, "learning_rate": 1.7751479289940828e-05, "loss": 0.4768, "step": 7630, "task_loss": 0.9156450629234314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6058624982833862, "epoch": 6.45, "learning_rate": 1.7747252747252748e-05, "loss": 0.6172, "step": 7631, "task_loss": 0.04020536318421364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5482578277587891, "epoch": 6.45, "learning_rate": 1.7743026204564668e-05, "loss": 0.5922, "step": 7632, "task_loss": 1.8229844570159912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6664584875106812, "epoch": 6.45, "learning_rate": 1.7738799661876584e-05, "loss": 0.5746, "step": 7633, "task_loss": 0.5663739442825317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3898431360721588, "epoch": 6.45, "learning_rate": 1.7734573119188504e-05, "loss": 0.383, "step": 7634, "task_loss": 0.9622520804405212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36202654242515564, "epoch": 6.45, "learning_rate": 1.7730346576500424e-05, "loss": 0.415, "step": 7635, "task_loss": 0.5289613604545593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34619855880737305, "epoch": 6.45, "learning_rate": 1.7726120033812343e-05, "loss": 0.3568, "step": 7636, "task_loss": 0.15002316236495972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3086897134780884, "epoch": 6.46, "learning_rate": 1.772189349112426e-05, "loss": 0.5307, "step": 7637, "task_loss": 0.7400524020195007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39203473925590515, "epoch": 6.46, "learning_rate": 1.771766694843618e-05, "loss": 0.4005, "step": 7638, "task_loss": 1.561889886856079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5477657914161682, "epoch": 6.46, "learning_rate": 1.77134404057481e-05, "loss": 0.7463, "step": 7639, "task_loss": 0.6657342314720154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5110782384872437, "epoch": 6.46, "learning_rate": 1.770921386306002e-05, "loss": 0.4163, "step": 7640, "task_loss": 0.455340176820755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31109434366226196, "epoch": 6.46, "learning_rate": 1.770498732037194e-05, "loss": 0.4609, "step": 7641, "task_loss": 0.5676177144050598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48154306411743164, "epoch": 6.46, "learning_rate": 1.7700760777683855e-05, "loss": 0.4617, "step": 7642, "task_loss": 0.38741618394851685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5222729444503784, "epoch": 6.46, "learning_rate": 1.7696534234995775e-05, "loss": 0.4583, "step": 7643, "task_loss": 1.2537728548049927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22973501682281494, "epoch": 6.46, "learning_rate": 1.7692307692307694e-05, "loss": 0.3225, "step": 7644, "task_loss": 0.2684953212738037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28304338455200195, "epoch": 6.46, "learning_rate": 1.768808114961961e-05, "loss": 0.3579, "step": 7645, "task_loss": 0.5095087289810181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5840449333190918, "epoch": 6.46, "learning_rate": 1.768385460693153e-05, "loss": 0.4825, "step": 7646, "task_loss": 0.3429000675678253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26519423723220825, "epoch": 6.46, "learning_rate": 1.767962806424345e-05, "loss": 0.2672, "step": 7647, "task_loss": 0.21497784554958344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37983691692352295, "epoch": 6.46, "learning_rate": 1.7675401521555367e-05, "loss": 0.3075, "step": 7648, "task_loss": 1.3823068141937256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3351721167564392, "epoch": 6.47, "learning_rate": 1.767117497886729e-05, "loss": 0.3132, "step": 7649, "task_loss": 0.2256188541650772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7568562030792236, "epoch": 6.47, "learning_rate": 1.7666948436179206e-05, "loss": 0.5098, "step": 7650, "task_loss": 0.42638882994651794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24094784259796143, "epoch": 6.47, "learning_rate": 1.7662721893491126e-05, "loss": 0.3808, "step": 7651, "task_loss": 0.643253743648529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2730315923690796, "epoch": 6.47, "learning_rate": 1.7658495350803046e-05, "loss": 0.4613, "step": 7652, "task_loss": 0.6675654053688049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4155542254447937, "epoch": 6.47, "learning_rate": 1.7654268808114962e-05, "loss": 0.5118, "step": 7653, "task_loss": 0.30436116456985474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3908953070640564, "epoch": 6.47, "learning_rate": 1.765004226542688e-05, "loss": 0.4511, "step": 7654, "task_loss": 0.170237734913826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24735316634178162, "epoch": 6.47, "learning_rate": 1.76458157227388e-05, "loss": 0.273, "step": 7655, "task_loss": 0.17232540249824524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40924322605133057, "epoch": 6.47, "learning_rate": 1.7641589180050718e-05, "loss": 0.3648, "step": 7656, "task_loss": 0.42511653900146484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4168434739112854, "epoch": 6.47, "learning_rate": 1.763736263736264e-05, "loss": 0.379, "step": 7657, "task_loss": 0.09970150142908096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6869746446609497, "epoch": 6.47, "learning_rate": 1.7633136094674557e-05, "loss": 0.4418, "step": 7658, "task_loss": 0.8750053644180298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3677084445953369, "epoch": 6.47, "learning_rate": 1.7628909551986474e-05, "loss": 0.3885, "step": 7659, "task_loss": 0.2624224126338959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36935144662857056, "epoch": 6.47, "learning_rate": 1.7624683009298397e-05, "loss": 0.3242, "step": 7660, "task_loss": 0.11176121979951859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17497488856315613, "epoch": 6.48, "learning_rate": 1.7620456466610313e-05, "loss": 0.3569, "step": 7661, "task_loss": 0.4668859541416168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25344911217689514, "epoch": 6.48, "learning_rate": 1.7616229923922233e-05, "loss": 0.4445, "step": 7662, "task_loss": 0.7531964182853699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29672592878341675, "epoch": 6.48, "learning_rate": 1.7612003381234152e-05, "loss": 0.468, "step": 7663, "task_loss": 0.15726575255393982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5118647217750549, "epoch": 6.48, "learning_rate": 1.760777683854607e-05, "loss": 0.5952, "step": 7664, "task_loss": 0.8347459435462952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46826356649398804, "epoch": 6.48, "learning_rate": 1.760355029585799e-05, "loss": 0.4859, "step": 7665, "task_loss": 1.637624979019165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5311981439590454, "epoch": 6.48, "learning_rate": 1.759932375316991e-05, "loss": 0.5427, "step": 7666, "task_loss": 0.8321746587753296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3801642954349518, "epoch": 6.48, "learning_rate": 1.7595097210481825e-05, "loss": 0.3187, "step": 7667, "task_loss": 0.5472202897071838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48967641592025757, "epoch": 6.48, "learning_rate": 1.7590870667793748e-05, "loss": 0.3944, "step": 7668, "task_loss": 0.5914241075515747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3747219145298004, "epoch": 6.48, "learning_rate": 1.7586644125105664e-05, "loss": 0.3464, "step": 7669, "task_loss": 0.6315202116966248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3646041452884674, "epoch": 6.48, "learning_rate": 1.7582417582417584e-05, "loss": 0.3837, "step": 7670, "task_loss": 1.2037979364395142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30353668332099915, "epoch": 6.48, "learning_rate": 1.7578191039729504e-05, "loss": 0.4312, "step": 7671, "task_loss": 0.7560117840766907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5347833633422852, "epoch": 6.48, "learning_rate": 1.757396449704142e-05, "loss": 0.4328, "step": 7672, "task_loss": 0.527636706829071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33705028891563416, "epoch": 6.49, "learning_rate": 1.756973795435334e-05, "loss": 0.3887, "step": 7673, "task_loss": 0.24453981220722198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.374274343252182, "epoch": 6.49, "learning_rate": 1.756551141166526e-05, "loss": 0.4261, "step": 7674, "task_loss": 0.3021724820137024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4270437955856323, "epoch": 6.49, "learning_rate": 1.7561284868977176e-05, "loss": 0.3873, "step": 7675, "task_loss": 0.13286586105823517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44173240661621094, "epoch": 6.49, "learning_rate": 1.7557058326289096e-05, "loss": 0.3775, "step": 7676, "task_loss": 0.5347998738288879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2840273380279541, "epoch": 6.49, "learning_rate": 1.7552831783601015e-05, "loss": 0.373, "step": 7677, "task_loss": 0.10822489857673645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4916588068008423, "epoch": 6.49, "learning_rate": 1.7548605240912935e-05, "loss": 0.4731, "step": 7678, "task_loss": 0.4730340838432312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39222168922424316, "epoch": 6.49, "learning_rate": 1.7544378698224855e-05, "loss": 0.5038, "step": 7679, "task_loss": 0.4105307161808014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3812994360923767, "epoch": 6.49, "learning_rate": 1.754015215553677e-05, "loss": 0.6001, "step": 7680, "task_loss": 1.0733082294464111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23272617161273956, "epoch": 6.49, "learning_rate": 1.753592561284869e-05, "loss": 0.3328, "step": 7681, "task_loss": 0.16583546996116638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34671711921691895, "epoch": 6.49, "learning_rate": 1.753169907016061e-05, "loss": 0.3929, "step": 7682, "task_loss": 0.0851937010884285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30802252888679504, "epoch": 6.49, "learning_rate": 1.7527472527472527e-05, "loss": 0.4577, "step": 7683, "task_loss": 0.028649890795350075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27338820695877075, "epoch": 6.5, "learning_rate": 1.7523245984784447e-05, "loss": 0.5321, "step": 7684, "task_loss": 0.3567676842212677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33301764726638794, "epoch": 6.5, "learning_rate": 1.7519019442096366e-05, "loss": 0.366, "step": 7685, "task_loss": 0.8334814310073853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2736169993877411, "epoch": 6.5, "learning_rate": 1.7514792899408286e-05, "loss": 0.329, "step": 7686, "task_loss": 0.6179097294807434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4419115483760834, "epoch": 6.5, "learning_rate": 1.7510566356720203e-05, "loss": 0.4189, "step": 7687, "task_loss": 1.06377112865448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38894888758659363, "epoch": 6.5, "learning_rate": 1.7506339814032122e-05, "loss": 0.4029, "step": 7688, "task_loss": 0.5451747179031372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38339364528656006, "epoch": 6.5, "learning_rate": 1.7502113271344042e-05, "loss": 0.4093, "step": 7689, "task_loss": 0.7164480090141296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9603133201599121, "epoch": 6.5, "learning_rate": 1.749788672865596e-05, "loss": 0.5462, "step": 7690, "task_loss": 1.6306289434432983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6875299215316772, "epoch": 6.5, "learning_rate": 1.7493660185967878e-05, "loss": 0.5243, "step": 7691, "task_loss": 0.9978961944580078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2090688794851303, "epoch": 6.5, "learning_rate": 1.7489433643279798e-05, "loss": 0.3599, "step": 7692, "task_loss": 0.08115692436695099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.434048593044281, "epoch": 6.5, "learning_rate": 1.7485207100591718e-05, "loss": 0.3888, "step": 7693, "task_loss": 0.5820990800857544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2986835241317749, "epoch": 6.5, "learning_rate": 1.7480980557903637e-05, "loss": 0.2882, "step": 7694, "task_loss": 0.27528345584869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31939250230789185, "epoch": 6.5, "learning_rate": 1.7476754015215554e-05, "loss": 0.4142, "step": 7695, "task_loss": 0.2997313141822815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.52631014585495, "epoch": 6.51, "learning_rate": 1.7472527472527473e-05, "loss": 0.4716, "step": 7696, "task_loss": 1.1308040618896484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3174107074737549, "epoch": 6.51, "learning_rate": 1.7468300929839393e-05, "loss": 0.3862, "step": 7697, "task_loss": 0.9476327896118164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.548916220664978, "epoch": 6.51, "learning_rate": 1.746407438715131e-05, "loss": 0.3609, "step": 7698, "task_loss": 0.5129746198654175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3017251789569855, "epoch": 6.51, "learning_rate": 1.7459847844463233e-05, "loss": 0.5279, "step": 7699, "task_loss": 0.21507754921913147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5731245279312134, "epoch": 6.51, "learning_rate": 1.745562130177515e-05, "loss": 0.5437, "step": 7700, "task_loss": 0.8468421101570129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.548230767250061, "epoch": 6.51, "learning_rate": 1.7451394759087065e-05, "loss": 0.52, "step": 7701, "task_loss": 0.34524816274642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2860320508480072, "epoch": 6.51, "learning_rate": 1.744716821639899e-05, "loss": 0.4362, "step": 7702, "task_loss": 0.32338404655456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35994386672973633, "epoch": 6.51, "learning_rate": 1.7442941673710905e-05, "loss": 0.3644, "step": 7703, "task_loss": 0.6820021271705627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2620415687561035, "epoch": 6.51, "learning_rate": 1.7438715131022825e-05, "loss": 0.4389, "step": 7704, "task_loss": 0.46701785922050476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46705323457717896, "epoch": 6.51, "learning_rate": 1.7434488588334744e-05, "loss": 0.4018, "step": 7705, "task_loss": 0.3430244028568268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4521525502204895, "epoch": 6.51, "learning_rate": 1.743026204564666e-05, "loss": 0.3825, "step": 7706, "task_loss": 0.44167059659957886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33179032802581787, "epoch": 6.51, "learning_rate": 1.742603550295858e-05, "loss": 0.4463, "step": 7707, "task_loss": 0.832005500793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45206132531166077, "epoch": 6.52, "learning_rate": 1.74218089602705e-05, "loss": 0.3853, "step": 7708, "task_loss": 1.3893319368362427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43434131145477295, "epoch": 6.52, "learning_rate": 1.7417582417582416e-05, "loss": 0.4519, "step": 7709, "task_loss": 0.6699163317680359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5219091773033142, "epoch": 6.52, "learning_rate": 1.741335587489434e-05, "loss": 0.4904, "step": 7710, "task_loss": 0.7903644442558289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5327441096305847, "epoch": 6.52, "learning_rate": 1.7409129332206256e-05, "loss": 0.6486, "step": 7711, "task_loss": 0.20667216181755066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.343507319688797, "epoch": 6.52, "learning_rate": 1.7404902789518172e-05, "loss": 0.389, "step": 7712, "task_loss": 0.9039720296859741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3960939943790436, "epoch": 6.52, "learning_rate": 1.7400676246830095e-05, "loss": 0.47, "step": 7713, "task_loss": 0.27050456404685974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3591465353965759, "epoch": 6.52, "learning_rate": 1.7396449704142012e-05, "loss": 0.4253, "step": 7714, "task_loss": 0.5246472954750061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.284004271030426, "epoch": 6.52, "learning_rate": 1.739222316145393e-05, "loss": 0.4174, "step": 7715, "task_loss": 0.8015879392623901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2273007035255432, "epoch": 6.52, "learning_rate": 1.738799661876585e-05, "loss": 0.3425, "step": 7716, "task_loss": 0.40578991174697876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3479277789592743, "epoch": 6.52, "learning_rate": 1.7383770076077768e-05, "loss": 0.4002, "step": 7717, "task_loss": 0.6750699877738953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4607001841068268, "epoch": 6.52, "learning_rate": 1.7379543533389687e-05, "loss": 0.5117, "step": 7718, "task_loss": 0.6770851612091064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.315701961517334, "epoch": 6.52, "learning_rate": 1.7375316990701607e-05, "loss": 0.4307, "step": 7719, "task_loss": 0.7415759563446045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4673118591308594, "epoch": 6.53, "learning_rate": 1.7371090448013523e-05, "loss": 0.4934, "step": 7720, "task_loss": 0.9815607666969299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.584915041923523, "epoch": 6.53, "learning_rate": 1.7366863905325447e-05, "loss": 0.3955, "step": 7721, "task_loss": 0.5981960296630859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4899916648864746, "epoch": 6.53, "learning_rate": 1.7362637362637363e-05, "loss": 0.5718, "step": 7722, "task_loss": 0.8930051922798157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.413751482963562, "epoch": 6.53, "learning_rate": 1.7358410819949283e-05, "loss": 0.491, "step": 7723, "task_loss": 0.29914146661758423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4485185146331787, "epoch": 6.53, "learning_rate": 1.7354184277261202e-05, "loss": 0.5704, "step": 7724, "task_loss": 0.36231741309165955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4056541621685028, "epoch": 6.53, "learning_rate": 1.734995773457312e-05, "loss": 0.406, "step": 7725, "task_loss": 1.0025750398635864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3242490887641907, "epoch": 6.53, "learning_rate": 1.734573119188504e-05, "loss": 0.445, "step": 7726, "task_loss": 0.7842551469802856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44807493686676025, "epoch": 6.53, "learning_rate": 1.7341504649196958e-05, "loss": 0.3608, "step": 7727, "task_loss": 0.23884382843971252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21364375948905945, "epoch": 6.53, "learning_rate": 1.7337278106508875e-05, "loss": 0.3179, "step": 7728, "task_loss": 0.02064407989382744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3939073085784912, "epoch": 6.53, "learning_rate": 1.7333051563820794e-05, "loss": 0.363, "step": 7729, "task_loss": 0.5518420338630676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33679160475730896, "epoch": 6.53, "learning_rate": 1.7328825021132714e-05, "loss": 0.3854, "step": 7730, "task_loss": 0.480367511510849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44632411003112793, "epoch": 6.53, "learning_rate": 1.7324598478444634e-05, "loss": 0.4849, "step": 7731, "task_loss": 0.3868582546710968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3780495524406433, "epoch": 6.54, "learning_rate": 1.7320371935756553e-05, "loss": 0.3508, "step": 7732, "task_loss": 0.9864240288734436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34663525223731995, "epoch": 6.54, "learning_rate": 1.731614539306847e-05, "loss": 0.3621, "step": 7733, "task_loss": 0.609247088432312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4681684374809265, "epoch": 6.54, "learning_rate": 1.731191885038039e-05, "loss": 0.4869, "step": 7734, "task_loss": 0.5776069164276123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35782939195632935, "epoch": 6.54, "learning_rate": 1.730769230769231e-05, "loss": 0.348, "step": 7735, "task_loss": 0.37651339173316956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3047736585140228, "epoch": 6.54, "learning_rate": 1.730346576500423e-05, "loss": 0.3089, "step": 7736, "task_loss": 1.1231772899627686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6444770097732544, "epoch": 6.54, "learning_rate": 1.7299239222316145e-05, "loss": 0.4698, "step": 7737, "task_loss": 1.2928693294525146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44074711203575134, "epoch": 6.54, "learning_rate": 1.7295012679628065e-05, "loss": 0.368, "step": 7738, "task_loss": 0.5922412276268005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4281979203224182, "epoch": 6.54, "learning_rate": 1.7290786136939985e-05, "loss": 0.3498, "step": 7739, "task_loss": 0.41255372762680054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21640227735042572, "epoch": 6.54, "learning_rate": 1.72865595942519e-05, "loss": 0.3073, "step": 7740, "task_loss": 0.42043188214302063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38790735602378845, "epoch": 6.54, "learning_rate": 1.728233305156382e-05, "loss": 0.4182, "step": 7741, "task_loss": 0.4633443057537079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24173420667648315, "epoch": 6.54, "learning_rate": 1.727810650887574e-05, "loss": 0.2969, "step": 7742, "task_loss": 0.166888028383255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5460012555122375, "epoch": 6.54, "learning_rate": 1.727387996618766e-05, "loss": 0.3732, "step": 7743, "task_loss": 0.6136801838874817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5152308940887451, "epoch": 6.55, "learning_rate": 1.726965342349958e-05, "loss": 0.4624, "step": 7744, "task_loss": 0.5298154950141907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3607529401779175, "epoch": 6.55, "learning_rate": 1.7265426880811497e-05, "loss": 0.4467, "step": 7745, "task_loss": 0.619924783706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4113730788230896, "epoch": 6.55, "learning_rate": 1.7261200338123416e-05, "loss": 0.5192, "step": 7746, "task_loss": 0.19888818264007568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8641488552093506, "epoch": 6.55, "learning_rate": 1.7256973795435336e-05, "loss": 0.6035, "step": 7747, "task_loss": 0.9087063074111938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38239723443984985, "epoch": 6.55, "learning_rate": 1.7252747252747252e-05, "loss": 0.3608, "step": 7748, "task_loss": 0.07301051169633865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6883953809738159, "epoch": 6.55, "learning_rate": 1.7248520710059172e-05, "loss": 0.633, "step": 7749, "task_loss": 1.3126689195632935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33494848012924194, "epoch": 6.55, "learning_rate": 1.7244294167371092e-05, "loss": 0.4549, "step": 7750, "task_loss": 0.700218141078949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3619251847267151, "epoch": 6.55, "learning_rate": 1.7240067624683008e-05, "loss": 0.4097, "step": 7751, "task_loss": 0.2858006954193115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37588953971862793, "epoch": 6.55, "learning_rate": 1.723584108199493e-05, "loss": 0.4991, "step": 7752, "task_loss": 0.7141557931900024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7976861000061035, "epoch": 6.55, "learning_rate": 1.7231614539306848e-05, "loss": 0.4785, "step": 7753, "task_loss": 0.3111616373062134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4063098728656769, "epoch": 6.55, "learning_rate": 1.7227387996618764e-05, "loss": 0.5148, "step": 7754, "task_loss": 1.5374327898025513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.004528522491455, "epoch": 6.56, "learning_rate": 1.7223161453930687e-05, "loss": 0.574, "step": 7755, "task_loss": 1.3567016124725342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2833079993724823, "epoch": 6.56, "learning_rate": 1.7218934911242603e-05, "loss": 0.3788, "step": 7756, "task_loss": 0.3709709346294403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.317840576171875, "epoch": 6.56, "learning_rate": 1.7214708368554523e-05, "loss": 0.389, "step": 7757, "task_loss": 0.6490671038627625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4796914756298065, "epoch": 6.56, "learning_rate": 1.7210481825866443e-05, "loss": 0.471, "step": 7758, "task_loss": 0.71211838722229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41714051365852356, "epoch": 6.56, "learning_rate": 1.720625528317836e-05, "loss": 0.3597, "step": 7759, "task_loss": 0.5786429047584534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3412782847881317, "epoch": 6.56, "learning_rate": 1.7202028740490282e-05, "loss": 0.5038, "step": 7760, "task_loss": 0.248692125082016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6352635622024536, "epoch": 6.56, "learning_rate": 1.71978021978022e-05, "loss": 0.5103, "step": 7761, "task_loss": 0.8469898700714111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29890939593315125, "epoch": 6.56, "learning_rate": 1.7193575655114115e-05, "loss": 0.3646, "step": 7762, "task_loss": 0.5046116709709167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5203819274902344, "epoch": 6.56, "learning_rate": 1.7189349112426038e-05, "loss": 0.4202, "step": 7763, "task_loss": 0.4697435200214386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39496156573295593, "epoch": 6.56, "learning_rate": 1.7185122569737955e-05, "loss": 0.3696, "step": 7764, "task_loss": 0.6271415948867798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37355777621269226, "epoch": 6.56, "learning_rate": 1.7180896027049874e-05, "loss": 0.4259, "step": 7765, "task_loss": 0.9390147924423218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42374247312545776, "epoch": 6.56, "learning_rate": 1.7176669484361794e-05, "loss": 0.4187, "step": 7766, "task_loss": 0.29991015791893005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21618777513504028, "epoch": 6.57, "learning_rate": 1.717244294167371e-05, "loss": 0.3345, "step": 7767, "task_loss": 0.30570247769355774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.189159095287323, "epoch": 6.57, "learning_rate": 1.716821639898563e-05, "loss": 0.4148, "step": 7768, "task_loss": 0.5018526315689087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1102922335267067, "epoch": 6.57, "learning_rate": 1.716398985629755e-05, "loss": 0.2867, "step": 7769, "task_loss": 0.0073256222531199455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49159306287765503, "epoch": 6.57, "learning_rate": 1.7159763313609466e-05, "loss": 0.463, "step": 7770, "task_loss": 0.31217604875564575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6200627088546753, "epoch": 6.57, "learning_rate": 1.7155536770921386e-05, "loss": 0.5146, "step": 7771, "task_loss": 0.7909308671951294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3019719421863556, "epoch": 6.57, "learning_rate": 1.7151310228233306e-05, "loss": 0.3225, "step": 7772, "task_loss": 0.3400425612926483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6875807046890259, "epoch": 6.57, "learning_rate": 1.7147083685545225e-05, "loss": 0.494, "step": 7773, "task_loss": 1.2067588567733765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.461720734834671, "epoch": 6.57, "learning_rate": 1.7142857142857145e-05, "loss": 0.6045, "step": 7774, "task_loss": 0.6054428219795227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5062723755836487, "epoch": 6.57, "learning_rate": 1.713863060016906e-05, "loss": 0.4223, "step": 7775, "task_loss": 0.8263669013977051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31832355260849, "epoch": 6.57, "learning_rate": 1.713440405748098e-05, "loss": 0.3156, "step": 7776, "task_loss": 1.0313677787780762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4299007058143616, "epoch": 6.57, "learning_rate": 1.71301775147929e-05, "loss": 0.4353, "step": 7777, "task_loss": 0.41176527738571167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49230480194091797, "epoch": 6.57, "learning_rate": 1.7125950972104817e-05, "loss": 0.4562, "step": 7778, "task_loss": 1.2623660564422607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3376258611679077, "epoch": 6.58, "learning_rate": 1.7121724429416737e-05, "loss": 0.4789, "step": 7779, "task_loss": 0.7366155982017517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3781077265739441, "epoch": 6.58, "learning_rate": 1.7117497886728657e-05, "loss": 0.4186, "step": 7780, "task_loss": 0.7562388181686401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3058122396469116, "epoch": 6.58, "learning_rate": 1.7113271344040577e-05, "loss": 0.435, "step": 7781, "task_loss": 0.7692509889602661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26906147599220276, "epoch": 6.58, "learning_rate": 1.7109044801352493e-05, "loss": 0.3771, "step": 7782, "task_loss": 0.13587145507335663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5102202892303467, "epoch": 6.58, "learning_rate": 1.7104818258664413e-05, "loss": 0.4139, "step": 7783, "task_loss": 1.2248032093048096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49844905734062195, "epoch": 6.58, "learning_rate": 1.7100591715976332e-05, "loss": 0.4452, "step": 7784, "task_loss": 0.8754245042800903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2663879990577698, "epoch": 6.58, "learning_rate": 1.7096365173288252e-05, "loss": 0.3024, "step": 7785, "task_loss": 0.16097551584243774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3796393871307373, "epoch": 6.58, "learning_rate": 1.709213863060017e-05, "loss": 0.6043, "step": 7786, "task_loss": 1.1156126260757446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7183825969696045, "epoch": 6.58, "learning_rate": 1.7087912087912088e-05, "loss": 0.408, "step": 7787, "task_loss": 1.122050166130066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4500882625579834, "epoch": 6.58, "learning_rate": 1.7083685545224008e-05, "loss": 0.3446, "step": 7788, "task_loss": 0.4593886137008667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42194950580596924, "epoch": 6.58, "learning_rate": 1.7079459002535928e-05, "loss": 0.3573, "step": 7789, "task_loss": 0.42660364508628845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27095115184783936, "epoch": 6.58, "learning_rate": 1.7075232459847844e-05, "loss": 0.3464, "step": 7790, "task_loss": 0.20211376249790192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34839215874671936, "epoch": 6.59, "learning_rate": 1.7071005917159764e-05, "loss": 0.433, "step": 7791, "task_loss": 0.5181761980056763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6757053732872009, "epoch": 6.59, "learning_rate": 1.7066779374471684e-05, "loss": 0.4329, "step": 7792, "task_loss": 1.353811502456665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.309563547372818, "epoch": 6.59, "learning_rate": 1.70625528317836e-05, "loss": 0.4309, "step": 7793, "task_loss": 0.885120689868927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5359622836112976, "epoch": 6.59, "learning_rate": 1.7058326289095523e-05, "loss": 0.5483, "step": 7794, "task_loss": 0.6671273708343506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3897382318973541, "epoch": 6.59, "learning_rate": 1.705409974640744e-05, "loss": 0.4413, "step": 7795, "task_loss": 0.3334895074367523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32747477293014526, "epoch": 6.59, "learning_rate": 1.704987320371936e-05, "loss": 0.3572, "step": 7796, "task_loss": 0.738261342048645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3865398168563843, "epoch": 6.59, "learning_rate": 1.704564666103128e-05, "loss": 0.3786, "step": 7797, "task_loss": 0.28624165058135986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2486703097820282, "epoch": 6.59, "learning_rate": 1.7041420118343195e-05, "loss": 0.2885, "step": 7798, "task_loss": 0.4943665564060211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3966832160949707, "epoch": 6.59, "learning_rate": 1.7037193575655115e-05, "loss": 0.4402, "step": 7799, "task_loss": 0.4427167773246765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26261886954307556, "epoch": 6.59, "learning_rate": 1.7032967032967035e-05, "loss": 0.4306, "step": 7800, "task_loss": 0.13064832985401154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32483941316604614, "epoch": 6.59, "learning_rate": 1.702874049027895e-05, "loss": 0.4874, "step": 7801, "task_loss": 0.1987677365541458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2856143116950989, "epoch": 6.59, "learning_rate": 1.7024513947590874e-05, "loss": 0.3863, "step": 7802, "task_loss": 0.5596334338188171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5095064640045166, "epoch": 6.6, "learning_rate": 1.702028740490279e-05, "loss": 0.4872, "step": 7803, "task_loss": 0.8551172614097595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4662837088108063, "epoch": 6.6, "learning_rate": 1.7016060862214707e-05, "loss": 0.4829, "step": 7804, "task_loss": 0.641343891620636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3950001895427704, "epoch": 6.6, "learning_rate": 1.701183431952663e-05, "loss": 0.4731, "step": 7805, "task_loss": 0.7662146091461182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5467774271965027, "epoch": 6.6, "learning_rate": 1.7007607776838546e-05, "loss": 0.5333, "step": 7806, "task_loss": 0.9715954661369324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9037359952926636, "epoch": 6.6, "learning_rate": 1.7003381234150466e-05, "loss": 0.4514, "step": 7807, "task_loss": 1.1113052368164062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34200286865234375, "epoch": 6.6, "learning_rate": 1.6999154691462386e-05, "loss": 0.4477, "step": 7808, "task_loss": 0.601847231388092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7178307771682739, "epoch": 6.6, "learning_rate": 1.6994928148774302e-05, "loss": 0.4622, "step": 7809, "task_loss": 1.2305738925933838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49173447489738464, "epoch": 6.6, "learning_rate": 1.6990701606086222e-05, "loss": 0.5046, "step": 7810, "task_loss": 0.3537293076515198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2590474486351013, "epoch": 6.6, "learning_rate": 1.698647506339814e-05, "loss": 0.3872, "step": 7811, "task_loss": 0.9678168892860413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29836034774780273, "epoch": 6.6, "learning_rate": 1.6982248520710058e-05, "loss": 0.4852, "step": 7812, "task_loss": 0.1088179275393486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2780569791793823, "epoch": 6.6, "learning_rate": 1.697802197802198e-05, "loss": 0.442, "step": 7813, "task_loss": 0.3472524881362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.438448965549469, "epoch": 6.6, "learning_rate": 1.6973795435333898e-05, "loss": 0.423, "step": 7814, "task_loss": 0.25880756974220276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.411201536655426, "epoch": 6.61, "learning_rate": 1.6969568892645814e-05, "loss": 0.4484, "step": 7815, "task_loss": 0.4490131139755249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26037681102752686, "epoch": 6.61, "learning_rate": 1.6965342349957737e-05, "loss": 0.375, "step": 7816, "task_loss": 0.5130388736724854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.472415566444397, "epoch": 6.61, "learning_rate": 1.6961115807269653e-05, "loss": 0.4606, "step": 7817, "task_loss": 1.0230894088745117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6055726408958435, "epoch": 6.61, "learning_rate": 1.6956889264581573e-05, "loss": 0.5151, "step": 7818, "task_loss": 0.6419128775596619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5813463926315308, "epoch": 6.61, "learning_rate": 1.6952662721893493e-05, "loss": 0.4788, "step": 7819, "task_loss": 0.9937012195587158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5667768716812134, "epoch": 6.61, "learning_rate": 1.694843617920541e-05, "loss": 0.4229, "step": 7820, "task_loss": 1.1213327646255493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3345906138420105, "epoch": 6.61, "learning_rate": 1.694420963651733e-05, "loss": 0.3897, "step": 7821, "task_loss": 1.0003970861434937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4654967188835144, "epoch": 6.61, "learning_rate": 1.693998309382925e-05, "loss": 0.505, "step": 7822, "task_loss": 0.5672613382339478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39240339398384094, "epoch": 6.61, "learning_rate": 1.693575655114117e-05, "loss": 0.5433, "step": 7823, "task_loss": 0.776161789894104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3743574023246765, "epoch": 6.61, "learning_rate": 1.6931530008453088e-05, "loss": 0.5195, "step": 7824, "task_loss": 0.7787492275238037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2515353858470917, "epoch": 6.61, "learning_rate": 1.6927303465765004e-05, "loss": 0.2995, "step": 7825, "task_loss": 0.18031682074069977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5146732330322266, "epoch": 6.61, "learning_rate": 1.6923076923076924e-05, "loss": 0.4757, "step": 7826, "task_loss": 0.9696012139320374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5856581926345825, "epoch": 6.62, "learning_rate": 1.6918850380388844e-05, "loss": 0.4807, "step": 7827, "task_loss": 1.7668358087539673 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41958820819854736, "epoch": 6.62, "learning_rate": 1.691462383770076e-05, "loss": 0.418, "step": 7828, "task_loss": 0.9741309881210327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31739336252212524, "epoch": 6.62, "learning_rate": 1.691039729501268e-05, "loss": 0.4027, "step": 7829, "task_loss": 0.6754883527755737 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27387967705726624, "epoch": 6.62, "learning_rate": 1.69061707523246e-05, "loss": 0.6698, "step": 7830, "task_loss": 0.5085774660110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4034919738769531, "epoch": 6.62, "learning_rate": 1.690194420963652e-05, "loss": 0.4253, "step": 7831, "task_loss": 0.4528971314430237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30572280287742615, "epoch": 6.62, "learning_rate": 1.6897717666948436e-05, "loss": 0.2868, "step": 7832, "task_loss": 0.13664884865283966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2029648870229721, "epoch": 6.62, "learning_rate": 1.6893491124260356e-05, "loss": 0.4788, "step": 7833, "task_loss": 0.6416718363761902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.834963321685791, "epoch": 6.62, "learning_rate": 1.6889264581572275e-05, "loss": 0.5773, "step": 7834, "task_loss": 0.9494382739067078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2479391247034073, "epoch": 6.62, "learning_rate": 1.688503803888419e-05, "loss": 0.3864, "step": 7835, "task_loss": 0.5319033861160278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36878520250320435, "epoch": 6.62, "learning_rate": 1.688081149619611e-05, "loss": 0.5437, "step": 7836, "task_loss": 1.075903058052063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3124607801437378, "epoch": 6.62, "learning_rate": 1.687658495350803e-05, "loss": 0.4679, "step": 7837, "task_loss": 0.30980852246284485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3998446762561798, "epoch": 6.63, "learning_rate": 1.687235841081995e-05, "loss": 0.416, "step": 7838, "task_loss": 1.0167372226715088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42109233140945435, "epoch": 6.63, "learning_rate": 1.686813186813187e-05, "loss": 0.5737, "step": 7839, "task_loss": 1.477961540222168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4839189648628235, "epoch": 6.63, "learning_rate": 1.6863905325443787e-05, "loss": 0.4945, "step": 7840, "task_loss": 1.487709879875183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2645489275455475, "epoch": 6.63, "learning_rate": 1.6859678782755707e-05, "loss": 0.4852, "step": 7841, "task_loss": 0.009127922356128693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38056236505508423, "epoch": 6.63, "learning_rate": 1.6855452240067626e-05, "loss": 0.4751, "step": 7842, "task_loss": 0.35602596402168274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3730393946170807, "epoch": 6.63, "learning_rate": 1.6851225697379543e-05, "loss": 0.5508, "step": 7843, "task_loss": 0.657197117805481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3719111979007721, "epoch": 6.63, "learning_rate": 1.6846999154691463e-05, "loss": 0.3195, "step": 7844, "task_loss": 0.4179593622684479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31686052680015564, "epoch": 6.63, "learning_rate": 1.6842772612003382e-05, "loss": 0.4291, "step": 7845, "task_loss": 0.7979422807693481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6795693635940552, "epoch": 6.63, "learning_rate": 1.68385460693153e-05, "loss": 0.5175, "step": 7846, "task_loss": 1.52130925655365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37797093391418457, "epoch": 6.63, "learning_rate": 1.6834319526627222e-05, "loss": 0.4584, "step": 7847, "task_loss": 0.7377219796180725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21766868233680725, "epoch": 6.63, "learning_rate": 1.6830092983939138e-05, "loss": 0.2589, "step": 7848, "task_loss": 0.33914804458618164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2780611515045166, "epoch": 6.63, "learning_rate": 1.6825866441251058e-05, "loss": 0.4986, "step": 7849, "task_loss": 0.39501118659973145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49253904819488525, "epoch": 6.64, "learning_rate": 1.6821639898562978e-05, "loss": 0.386, "step": 7850, "task_loss": 0.9855445623397827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6423546671867371, "epoch": 6.64, "learning_rate": 1.6817413355874894e-05, "loss": 0.4266, "step": 7851, "task_loss": 0.6134361624717712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.746596097946167, "epoch": 6.64, "learning_rate": 1.6813186813186814e-05, "loss": 0.4749, "step": 7852, "task_loss": 0.6976813673973083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35743534564971924, "epoch": 6.64, "learning_rate": 1.6808960270498733e-05, "loss": 0.4166, "step": 7853, "task_loss": 0.6467171907424927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2795352041721344, "epoch": 6.64, "learning_rate": 1.680473372781065e-05, "loss": 0.5168, "step": 7854, "task_loss": 0.031177816912531853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6888682246208191, "epoch": 6.64, "learning_rate": 1.6800507185122573e-05, "loss": 0.4106, "step": 7855, "task_loss": 0.8098357319831848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41857612133026123, "epoch": 6.64, "learning_rate": 1.679628064243449e-05, "loss": 0.4888, "step": 7856, "task_loss": 0.6957071423530579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28110942244529724, "epoch": 6.64, "learning_rate": 1.6792054099746406e-05, "loss": 0.3718, "step": 7857, "task_loss": 0.4127071797847748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38301414251327515, "epoch": 6.64, "learning_rate": 1.678782755705833e-05, "loss": 0.3611, "step": 7858, "task_loss": 0.17276017367839813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44969305396080017, "epoch": 6.64, "learning_rate": 1.6783601014370245e-05, "loss": 0.4666, "step": 7859, "task_loss": 0.8074188232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29954028129577637, "epoch": 6.64, "learning_rate": 1.6779374471682165e-05, "loss": 0.3689, "step": 7860, "task_loss": 0.6283911466598511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28224149346351624, "epoch": 6.64, "learning_rate": 1.6775147928994085e-05, "loss": 0.3831, "step": 7861, "task_loss": 0.8562459349632263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2840210795402527, "epoch": 6.65, "learning_rate": 1.6770921386306e-05, "loss": 0.3591, "step": 7862, "task_loss": 0.35653284192085266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3328137993812561, "epoch": 6.65, "learning_rate": 1.676669484361792e-05, "loss": 0.4094, "step": 7863, "task_loss": 0.6902181506156921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27826401591300964, "epoch": 6.65, "learning_rate": 1.676246830092984e-05, "loss": 0.482, "step": 7864, "task_loss": 0.2302171289920807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6427114605903625, "epoch": 6.65, "learning_rate": 1.6758241758241757e-05, "loss": 0.5667, "step": 7865, "task_loss": 1.0369083881378174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.597230076789856, "epoch": 6.65, "learning_rate": 1.675401521555368e-05, "loss": 0.5226, "step": 7866, "task_loss": 0.4389936625957489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22895793616771698, "epoch": 6.65, "learning_rate": 1.6749788672865596e-05, "loss": 0.3498, "step": 7867, "task_loss": 0.23320049047470093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42503148317337036, "epoch": 6.65, "learning_rate": 1.6745562130177516e-05, "loss": 0.492, "step": 7868, "task_loss": 0.36693093180656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22606365382671356, "epoch": 6.65, "learning_rate": 1.6741335587489436e-05, "loss": 0.3632, "step": 7869, "task_loss": 0.40443316102027893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3249947428703308, "epoch": 6.65, "learning_rate": 1.6737109044801352e-05, "loss": 0.4099, "step": 7870, "task_loss": 0.7696169018745422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9552532434463501, "epoch": 6.65, "learning_rate": 1.6732882502113272e-05, "loss": 0.515, "step": 7871, "task_loss": 1.6129182577133179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23829714953899384, "epoch": 6.65, "learning_rate": 1.672865595942519e-05, "loss": 0.3475, "step": 7872, "task_loss": 0.470314621925354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42258942127227783, "epoch": 6.65, "learning_rate": 1.6724429416737108e-05, "loss": 0.384, "step": 7873, "task_loss": 0.7635220289230347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2941585183143616, "epoch": 6.66, "learning_rate": 1.6720202874049028e-05, "loss": 0.3763, "step": 7874, "task_loss": 0.16405069828033447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3593031167984009, "epoch": 6.66, "learning_rate": 1.6715976331360947e-05, "loss": 0.4803, "step": 7875, "task_loss": 0.9557135105133057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3731909394264221, "epoch": 6.66, "learning_rate": 1.6711749788672867e-05, "loss": 0.4268, "step": 7876, "task_loss": 0.22565065324306488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2306399643421173, "epoch": 6.66, "learning_rate": 1.6707523245984787e-05, "loss": 0.4265, "step": 7877, "task_loss": 0.7818352580070496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2936776876449585, "epoch": 6.66, "learning_rate": 1.6703296703296703e-05, "loss": 0.299, "step": 7878, "task_loss": 0.40420129895210266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24466188251972198, "epoch": 6.66, "learning_rate": 1.6699070160608623e-05, "loss": 0.3941, "step": 7879, "task_loss": 0.1965845227241516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3178272247314453, "epoch": 6.66, "learning_rate": 1.6694843617920543e-05, "loss": 0.3404, "step": 7880, "task_loss": 0.19740581512451172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32702547311782837, "epoch": 6.66, "learning_rate": 1.6690617075232462e-05, "loss": 0.3724, "step": 7881, "task_loss": 1.2774770259857178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4005640149116516, "epoch": 6.66, "learning_rate": 1.668639053254438e-05, "loss": 0.4585, "step": 7882, "task_loss": 0.7200934290885925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4011356234550476, "epoch": 6.66, "learning_rate": 1.66821639898563e-05, "loss": 0.4334, "step": 7883, "task_loss": 0.2082877904176712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5061812400817871, "epoch": 6.66, "learning_rate": 1.6677937447168218e-05, "loss": 0.4661, "step": 7884, "task_loss": 0.7716788649559021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41362491250038147, "epoch": 6.66, "learning_rate": 1.6673710904480135e-05, "loss": 0.4154, "step": 7885, "task_loss": 0.4621753990650177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.627677321434021, "epoch": 6.67, "learning_rate": 1.6669484361792054e-05, "loss": 0.5421, "step": 7886, "task_loss": 0.7942931056022644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7152157425880432, "epoch": 6.67, "learning_rate": 1.6665257819103974e-05, "loss": 0.5423, "step": 7887, "task_loss": 1.3514974117279053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24486316740512848, "epoch": 6.67, "learning_rate": 1.6661031276415894e-05, "loss": 0.3862, "step": 7888, "task_loss": 0.4385841488838196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49971505999565125, "epoch": 6.67, "learning_rate": 1.6656804733727814e-05, "loss": 0.4765, "step": 7889, "task_loss": 1.7225147485733032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3985351324081421, "epoch": 6.67, "learning_rate": 1.665257819103973e-05, "loss": 0.3644, "step": 7890, "task_loss": 0.5793612003326416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.217386856675148, "epoch": 6.67, "learning_rate": 1.664835164835165e-05, "loss": 0.3296, "step": 7891, "task_loss": 0.04287043213844299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5360692739486694, "epoch": 6.67, "learning_rate": 1.664412510566357e-05, "loss": 0.3718, "step": 7892, "task_loss": 0.4197237193584442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27151763439178467, "epoch": 6.67, "learning_rate": 1.6639898562975486e-05, "loss": 0.422, "step": 7893, "task_loss": 0.4209800660610199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2818731367588043, "epoch": 6.67, "learning_rate": 1.6635672020287405e-05, "loss": 0.4265, "step": 7894, "task_loss": 0.6921606659889221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4401148855686188, "epoch": 6.67, "learning_rate": 1.6631445477599325e-05, "loss": 0.3912, "step": 7895, "task_loss": 0.24736544489860535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1674456000328064, "epoch": 6.67, "learning_rate": 1.662721893491124e-05, "loss": 0.3296, "step": 7896, "task_loss": 0.5265105962753296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6385089159011841, "epoch": 6.67, "learning_rate": 1.6622992392223165e-05, "loss": 0.433, "step": 7897, "task_loss": 1.0874245166778564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2909735441207886, "epoch": 6.68, "learning_rate": 1.661876584953508e-05, "loss": 0.4436, "step": 7898, "task_loss": 0.3805181086063385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4249701201915741, "epoch": 6.68, "learning_rate": 1.6614539306846997e-05, "loss": 0.377, "step": 7899, "task_loss": 0.5190927982330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4515044689178467, "epoch": 6.68, "learning_rate": 1.661031276415892e-05, "loss": 0.5262, "step": 7900, "task_loss": 0.1972443014383316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4924963116645813, "epoch": 6.68, "learning_rate": 1.6606086221470837e-05, "loss": 0.4453, "step": 7901, "task_loss": 1.2507990598678589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4823368787765503, "epoch": 6.68, "learning_rate": 1.6601859678782757e-05, "loss": 0.4629, "step": 7902, "task_loss": 0.6411387324333191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30803802609443665, "epoch": 6.68, "learning_rate": 1.6597633136094676e-05, "loss": 0.3401, "step": 7903, "task_loss": 0.39228853583335876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36034291982650757, "epoch": 6.68, "learning_rate": 1.6593406593406593e-05, "loss": 0.466, "step": 7904, "task_loss": 1.4728156328201294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36487507820129395, "epoch": 6.68, "learning_rate": 1.6589180050718516e-05, "loss": 0.4961, "step": 7905, "task_loss": 0.1345130205154419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7971124649047852, "epoch": 6.68, "learning_rate": 1.6584953508030432e-05, "loss": 0.5819, "step": 7906, "task_loss": 0.5571340322494507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33632469177246094, "epoch": 6.68, "learning_rate": 1.658072696534235e-05, "loss": 0.3748, "step": 7907, "task_loss": 0.39115187525749207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.922744870185852, "epoch": 6.68, "learning_rate": 1.657650042265427e-05, "loss": 0.5942, "step": 7908, "task_loss": 0.861556351184845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15169547498226166, "epoch": 6.69, "learning_rate": 1.6572273879966188e-05, "loss": 0.4049, "step": 7909, "task_loss": 0.019703006371855736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47481006383895874, "epoch": 6.69, "learning_rate": 1.6568047337278108e-05, "loss": 0.3535, "step": 7910, "task_loss": 1.2536118030548096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3274134397506714, "epoch": 6.69, "learning_rate": 1.6563820794590027e-05, "loss": 0.3646, "step": 7911, "task_loss": 0.10222628712654114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25910577178001404, "epoch": 6.69, "learning_rate": 1.6559594251901944e-05, "loss": 0.4001, "step": 7912, "task_loss": 0.648970365524292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34479060769081116, "epoch": 6.69, "learning_rate": 1.6555367709213864e-05, "loss": 0.439, "step": 7913, "task_loss": 0.6855186820030212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38624656200408936, "epoch": 6.69, "learning_rate": 1.6551141166525783e-05, "loss": 0.4484, "step": 7914, "task_loss": 0.5227249264717102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6417444944381714, "epoch": 6.69, "learning_rate": 1.65469146238377e-05, "loss": 0.5027, "step": 7915, "task_loss": 1.4856117963790894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3462070822715759, "epoch": 6.69, "learning_rate": 1.654268808114962e-05, "loss": 0.3855, "step": 7916, "task_loss": 0.33778905868530273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34173285961151123, "epoch": 6.69, "learning_rate": 1.653846153846154e-05, "loss": 0.3932, "step": 7917, "task_loss": 0.35714563727378845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21421535313129425, "epoch": 6.69, "learning_rate": 1.653423499577346e-05, "loss": 0.404, "step": 7918, "task_loss": 0.03819049149751663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5638412237167358, "epoch": 6.69, "learning_rate": 1.653000845308538e-05, "loss": 0.394, "step": 7919, "task_loss": 0.46492114663124084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4318890869617462, "epoch": 6.69, "learning_rate": 1.6525781910397295e-05, "loss": 0.3902, "step": 7920, "task_loss": 0.3392890989780426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37183642387390137, "epoch": 6.7, "learning_rate": 1.6521555367709215e-05, "loss": 0.4021, "step": 7921, "task_loss": 0.48992031812667847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5553531050682068, "epoch": 6.7, "learning_rate": 1.6517328825021134e-05, "loss": 0.5349, "step": 7922, "task_loss": 0.3624607026576996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7443002462387085, "epoch": 6.7, "learning_rate": 1.651310228233305e-05, "loss": 0.4903, "step": 7923, "task_loss": 0.5412360429763794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3718640208244324, "epoch": 6.7, "learning_rate": 1.650887573964497e-05, "loss": 0.4557, "step": 7924, "task_loss": 0.5478312969207764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32824039459228516, "epoch": 6.7, "learning_rate": 1.650464919695689e-05, "loss": 0.4565, "step": 7925, "task_loss": 0.8614242672920227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3667967915534973, "epoch": 6.7, "learning_rate": 1.650042265426881e-05, "loss": 0.4552, "step": 7926, "task_loss": 0.2793425917625427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44447726011276245, "epoch": 6.7, "learning_rate": 1.6496196111580726e-05, "loss": 0.3646, "step": 7927, "task_loss": 0.7600813508033752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6184817552566528, "epoch": 6.7, "learning_rate": 1.6491969568892646e-05, "loss": 0.4924, "step": 7928, "task_loss": 0.9183597564697266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41231435537338257, "epoch": 6.7, "learning_rate": 1.6487743026204566e-05, "loss": 0.4237, "step": 7929, "task_loss": 0.3910095691680908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6822634935379028, "epoch": 6.7, "learning_rate": 1.6483516483516486e-05, "loss": 0.5559, "step": 7930, "task_loss": 0.8809622526168823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33036908507347107, "epoch": 6.7, "learning_rate": 1.6479289940828402e-05, "loss": 0.5491, "step": 7931, "task_loss": 0.13775520026683807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3999181389808655, "epoch": 6.7, "learning_rate": 1.647506339814032e-05, "loss": 0.4929, "step": 7932, "task_loss": 0.5354138612747192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.211920365691185, "epoch": 6.71, "learning_rate": 1.647083685545224e-05, "loss": 0.3587, "step": 7933, "task_loss": 0.6876026391983032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40062692761421204, "epoch": 6.71, "learning_rate": 1.646661031276416e-05, "loss": 0.4132, "step": 7934, "task_loss": 0.34042972326278687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5441950559616089, "epoch": 6.71, "learning_rate": 1.6462383770076077e-05, "loss": 0.5045, "step": 7935, "task_loss": 0.839040994644165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37070974707603455, "epoch": 6.71, "learning_rate": 1.6458157227387997e-05, "loss": 0.4938, "step": 7936, "task_loss": 0.6015868782997131 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4295511245727539, "epoch": 6.71, "learning_rate": 1.6453930684699917e-05, "loss": 0.3948, "step": 7937, "task_loss": 0.8989497423171997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3794359564781189, "epoch": 6.71, "learning_rate": 1.6449704142011833e-05, "loss": 0.3314, "step": 7938, "task_loss": 0.2520560026168823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4793817400932312, "epoch": 6.71, "learning_rate": 1.6445477599323756e-05, "loss": 0.3654, "step": 7939, "task_loss": 0.8180513381958008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4951170086860657, "epoch": 6.71, "learning_rate": 1.6441251056635673e-05, "loss": 0.4887, "step": 7940, "task_loss": 0.9944536685943604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25072160363197327, "epoch": 6.71, "learning_rate": 1.6437024513947592e-05, "loss": 0.5373, "step": 7941, "task_loss": 0.6939756870269775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24348419904708862, "epoch": 6.71, "learning_rate": 1.6432797971259512e-05, "loss": 0.2321, "step": 7942, "task_loss": 0.40479740500450134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5575716495513916, "epoch": 6.71, "learning_rate": 1.642857142857143e-05, "loss": 0.3591, "step": 7943, "task_loss": 0.5743822455406189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5072205662727356, "epoch": 6.71, "learning_rate": 1.642434488588335e-05, "loss": 0.4241, "step": 7944, "task_loss": 0.405695378780365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2288137674331665, "epoch": 6.72, "learning_rate": 1.6420118343195268e-05, "loss": 0.3291, "step": 7945, "task_loss": 1.074151873588562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47672244906425476, "epoch": 6.72, "learning_rate": 1.6415891800507184e-05, "loss": 0.4439, "step": 7946, "task_loss": 1.2125324010849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32044488191604614, "epoch": 6.72, "learning_rate": 1.6411665257819108e-05, "loss": 0.465, "step": 7947, "task_loss": 0.4071246385574341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5282348394393921, "epoch": 6.72, "learning_rate": 1.6407438715131024e-05, "loss": 0.4771, "step": 7948, "task_loss": 1.1675145626068115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22832587361335754, "epoch": 6.72, "learning_rate": 1.640321217244294e-05, "loss": 0.2561, "step": 7949, "task_loss": 0.44228026270866394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4511672854423523, "epoch": 6.72, "learning_rate": 1.6398985629754863e-05, "loss": 0.4778, "step": 7950, "task_loss": 0.6656588912010193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6070488095283508, "epoch": 6.72, "learning_rate": 1.639475908706678e-05, "loss": 0.4524, "step": 7951, "task_loss": 0.8925662040710449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45406103134155273, "epoch": 6.72, "learning_rate": 1.63905325443787e-05, "loss": 0.3515, "step": 7952, "task_loss": 0.5755024552345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38286399841308594, "epoch": 6.72, "learning_rate": 1.638630600169062e-05, "loss": 0.5273, "step": 7953, "task_loss": 0.2970421612262726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22014494240283966, "epoch": 6.72, "learning_rate": 1.6382079459002536e-05, "loss": 0.2855, "step": 7954, "task_loss": 0.5068712830543518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4310670495033264, "epoch": 6.72, "learning_rate": 1.6377852916314455e-05, "loss": 0.5253, "step": 7955, "task_loss": 0.8217558860778809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4381455183029175, "epoch": 6.72, "learning_rate": 1.6373626373626375e-05, "loss": 0.4016, "step": 7956, "task_loss": 0.6056846380233765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3161821663379669, "epoch": 6.73, "learning_rate": 1.636939983093829e-05, "loss": 0.5172, "step": 7957, "task_loss": 0.260733962059021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44755077362060547, "epoch": 6.73, "learning_rate": 1.6365173288250214e-05, "loss": 0.4121, "step": 7958, "task_loss": 0.9655861258506775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5423612594604492, "epoch": 6.73, "learning_rate": 1.636094674556213e-05, "loss": 0.4104, "step": 7959, "task_loss": 0.8968042135238647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3530900478363037, "epoch": 6.73, "learning_rate": 1.6356720202874047e-05, "loss": 0.412, "step": 7960, "task_loss": 0.18608194589614868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2978460192680359, "epoch": 6.73, "learning_rate": 1.635249366018597e-05, "loss": 0.3943, "step": 7961, "task_loss": 0.2696540951728821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3914908170700073, "epoch": 6.73, "learning_rate": 1.6348267117497887e-05, "loss": 0.5007, "step": 7962, "task_loss": 0.10750412940979004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.265880286693573, "epoch": 6.73, "learning_rate": 1.6344040574809806e-05, "loss": 0.3117, "step": 7963, "task_loss": 0.16568820178508759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6397131681442261, "epoch": 6.73, "learning_rate": 1.6339814032121726e-05, "loss": 0.3453, "step": 7964, "task_loss": 0.10658658295869827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2626704275608063, "epoch": 6.73, "learning_rate": 1.6335587489433643e-05, "loss": 0.4288, "step": 7965, "task_loss": 0.2992420494556427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23224788904190063, "epoch": 6.73, "learning_rate": 1.6331360946745562e-05, "loss": 0.4971, "step": 7966, "task_loss": 0.3319239914417267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4272986054420471, "epoch": 6.73, "learning_rate": 1.6327134404057482e-05, "loss": 0.3209, "step": 7967, "task_loss": 0.5666282176971436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4463226795196533, "epoch": 6.73, "learning_rate": 1.6322907861369402e-05, "loss": 0.514, "step": 7968, "task_loss": 1.488161325454712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5011719465255737, "epoch": 6.74, "learning_rate": 1.631868131868132e-05, "loss": 0.4265, "step": 7969, "task_loss": 0.24966245889663696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5733376145362854, "epoch": 6.74, "learning_rate": 1.6314454775993238e-05, "loss": 0.6238, "step": 7970, "task_loss": 1.1430233716964722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3056926131248474, "epoch": 6.74, "learning_rate": 1.6310228233305158e-05, "loss": 0.5008, "step": 7971, "task_loss": 0.3297462463378906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3262958228588104, "epoch": 6.74, "learning_rate": 1.6306001690617077e-05, "loss": 0.4637, "step": 7972, "task_loss": 0.16790935397148132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.347761869430542, "epoch": 6.74, "learning_rate": 1.6301775147928994e-05, "loss": 0.3851, "step": 7973, "task_loss": 0.8909550309181213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4962056577205658, "epoch": 6.74, "learning_rate": 1.6297548605240913e-05, "loss": 0.64, "step": 7974, "task_loss": 0.893773078918457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4327651858329773, "epoch": 6.74, "learning_rate": 1.6293322062552833e-05, "loss": 0.5309, "step": 7975, "task_loss": 0.3724449574947357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2775087356567383, "epoch": 6.74, "learning_rate": 1.6289095519864753e-05, "loss": 0.3464, "step": 7976, "task_loss": 0.48715224862098694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3297877013683319, "epoch": 6.74, "learning_rate": 1.628486897717667e-05, "loss": 0.3777, "step": 7977, "task_loss": 0.24879519641399384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4638915956020355, "epoch": 6.74, "learning_rate": 1.628064243448859e-05, "loss": 0.43, "step": 7978, "task_loss": 0.5176271200180054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3891712427139282, "epoch": 6.74, "learning_rate": 1.627641589180051e-05, "loss": 0.5269, "step": 7979, "task_loss": 1.4875051975250244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28974100947380066, "epoch": 6.75, "learning_rate": 1.6272189349112425e-05, "loss": 0.4091, "step": 7980, "task_loss": 0.6200781464576721 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7219356298446655, "epoch": 6.75, "learning_rate": 1.6267962806424345e-05, "loss": 0.4664, "step": 7981, "task_loss": 0.5566542744636536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4468679130077362, "epoch": 6.75, "learning_rate": 1.6263736263736265e-05, "loss": 0.4221, "step": 7982, "task_loss": 0.8056275844573975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36290279030799866, "epoch": 6.75, "learning_rate": 1.6259509721048184e-05, "loss": 0.6668, "step": 7983, "task_loss": 1.2422292232513428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.317783385515213, "epoch": 6.75, "learning_rate": 1.6255283178360104e-05, "loss": 0.4635, "step": 7984, "task_loss": 0.051301438361406326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39396974444389343, "epoch": 6.75, "learning_rate": 1.625105663567202e-05, "loss": 0.4588, "step": 7985, "task_loss": 0.5450160503387451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32647043466567993, "epoch": 6.75, "learning_rate": 1.624683009298394e-05, "loss": 0.3045, "step": 7986, "task_loss": 0.5163854956626892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3099084496498108, "epoch": 6.75, "learning_rate": 1.624260355029586e-05, "loss": 0.4896, "step": 7987, "task_loss": 0.47284674644470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5949761867523193, "epoch": 6.75, "learning_rate": 1.6238377007607776e-05, "loss": 0.5508, "step": 7988, "task_loss": 0.40143948793411255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35647398233413696, "epoch": 6.75, "learning_rate": 1.6234150464919696e-05, "loss": 0.4673, "step": 7989, "task_loss": 0.5432431101799011 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5430876016616821, "epoch": 6.75, "learning_rate": 1.6229923922231616e-05, "loss": 0.4804, "step": 7990, "task_loss": 0.8444298505783081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.650983452796936, "epoch": 6.75, "learning_rate": 1.6225697379543532e-05, "loss": 0.4648, "step": 7991, "task_loss": 0.6564168930053711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40401947498321533, "epoch": 6.76, "learning_rate": 1.6221470836855455e-05, "loss": 0.3705, "step": 7992, "task_loss": 0.5064387917518616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3301240801811218, "epoch": 6.76, "learning_rate": 1.621724429416737e-05, "loss": 0.3703, "step": 7993, "task_loss": 0.191017284989357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16784796118736267, "epoch": 6.76, "learning_rate": 1.621301775147929e-05, "loss": 0.4085, "step": 7994, "task_loss": 0.013024200685322285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5615407824516296, "epoch": 6.76, "learning_rate": 1.620879120879121e-05, "loss": 0.4057, "step": 7995, "task_loss": 1.4824144840240479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6682569980621338, "epoch": 6.76, "learning_rate": 1.6204564666103127e-05, "loss": 0.4554, "step": 7996, "task_loss": 1.2234739065170288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5098133087158203, "epoch": 6.76, "learning_rate": 1.6200338123415047e-05, "loss": 0.4248, "step": 7997, "task_loss": 1.1086411476135254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49236127734184265, "epoch": 6.76, "learning_rate": 1.6196111580726967e-05, "loss": 0.3804, "step": 7998, "task_loss": 1.1716437339782715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4926813840866089, "epoch": 6.76, "learning_rate": 1.6191885038038883e-05, "loss": 0.4639, "step": 7999, "task_loss": 0.4787040650844574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2839115858078003, "epoch": 6.76, "learning_rate": 1.6187658495350806e-05, "loss": 0.4997, "step": 8000, "task_loss": 0.6261990070343018 }, { "epoch": 6.76, "eval_accuracy": 0.9125544554455446, "eval_loss": 0.27955999970436096, "eval_runtime": 228.9141, "eval_samples_per_second": 110.303, "eval_steps_per_second": 0.865, "step": 8000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16095927357673645, "epoch": 6.76, "learning_rate": 1.6183431952662723e-05, "loss": 0.4743, "step": 8001, "task_loss": 0.2690582871437073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012394189834595, "epoch": 6.76, "learning_rate": 1.617920540997464e-05, "loss": 0.3123, "step": 8002, "task_loss": 0.49093863368034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6563464999198914, "epoch": 6.76, "learning_rate": 1.6174978867286562e-05, "loss": 0.5459, "step": 8003, "task_loss": 1.0457136631011963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3518804907798767, "epoch": 6.77, "learning_rate": 1.617075232459848e-05, "loss": 0.3818, "step": 8004, "task_loss": 0.3805527985095978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6097347140312195, "epoch": 6.77, "learning_rate": 1.6166525781910398e-05, "loss": 0.4862, "step": 8005, "task_loss": 1.503118634223938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38980960845947266, "epoch": 6.77, "learning_rate": 1.6162299239222318e-05, "loss": 0.4604, "step": 8006, "task_loss": 0.9634042382240295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4452405273914337, "epoch": 6.77, "learning_rate": 1.6158072696534234e-05, "loss": 0.4147, "step": 8007, "task_loss": 0.622454822063446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8142214417457581, "epoch": 6.77, "learning_rate": 1.6153846153846154e-05, "loss": 0.4728, "step": 8008, "task_loss": 0.5377688407897949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40532761812210083, "epoch": 6.77, "learning_rate": 1.6149619611158074e-05, "loss": 0.4213, "step": 8009, "task_loss": 0.4275454580783844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3079216778278351, "epoch": 6.77, "learning_rate": 1.614539306846999e-05, "loss": 0.4219, "step": 8010, "task_loss": 0.34865784645080566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27649450302124023, "epoch": 6.77, "learning_rate": 1.6141166525781913e-05, "loss": 0.3931, "step": 8011, "task_loss": 0.581843912601471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36940401792526245, "epoch": 6.77, "learning_rate": 1.613693998309383e-05, "loss": 0.4218, "step": 8012, "task_loss": 1.0136339664459229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29815709590911865, "epoch": 6.77, "learning_rate": 1.613271344040575e-05, "loss": 0.5031, "step": 8013, "task_loss": 0.7317314743995667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4165448546409607, "epoch": 6.77, "learning_rate": 1.612848689771767e-05, "loss": 0.3401, "step": 8014, "task_loss": 0.8661158084869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2309708148241043, "epoch": 6.77, "learning_rate": 1.6124260355029585e-05, "loss": 0.4438, "step": 8015, "task_loss": 0.30182480812072754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7776057720184326, "epoch": 6.78, "learning_rate": 1.6120033812341505e-05, "loss": 0.4979, "step": 8016, "task_loss": 0.6237890720367432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34678593277931213, "epoch": 6.78, "learning_rate": 1.6115807269653425e-05, "loss": 0.4933, "step": 8017, "task_loss": 0.5657974481582642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44178688526153564, "epoch": 6.78, "learning_rate": 1.611158072696534e-05, "loss": 0.5281, "step": 8018, "task_loss": 1.1523361206054688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5925604701042175, "epoch": 6.78, "learning_rate": 1.610735418427726e-05, "loss": 0.4988, "step": 8019, "task_loss": 0.328058123588562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4390653967857361, "epoch": 6.78, "learning_rate": 1.610312764158918e-05, "loss": 0.4739, "step": 8020, "task_loss": 0.5127101540565491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5726560354232788, "epoch": 6.78, "learning_rate": 1.60989010989011e-05, "loss": 0.5856, "step": 8021, "task_loss": 0.8196535110473633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5633814930915833, "epoch": 6.78, "learning_rate": 1.609467455621302e-05, "loss": 0.4737, "step": 8022, "task_loss": 0.36884334683418274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3535408675670624, "epoch": 6.78, "learning_rate": 1.6090448013524937e-05, "loss": 0.3469, "step": 8023, "task_loss": 0.10423684865236282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16628949344158173, "epoch": 6.78, "learning_rate": 1.6086221470836856e-05, "loss": 0.3359, "step": 8024, "task_loss": 0.5117090344429016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29161396622657776, "epoch": 6.78, "learning_rate": 1.6081994928148776e-05, "loss": 0.3325, "step": 8025, "task_loss": 0.6183074116706848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.390272319316864, "epoch": 6.78, "learning_rate": 1.6077768385460696e-05, "loss": 0.3471, "step": 8026, "task_loss": 0.5201712846755981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5231207609176636, "epoch": 6.78, "learning_rate": 1.6073541842772612e-05, "loss": 0.4771, "step": 8027, "task_loss": 0.2206430435180664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35198789834976196, "epoch": 6.79, "learning_rate": 1.6069315300084532e-05, "loss": 0.4537, "step": 8028, "task_loss": 0.5128129720687866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33014366030693054, "epoch": 6.79, "learning_rate": 1.606508875739645e-05, "loss": 0.3205, "step": 8029, "task_loss": 0.8191583752632141 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3232291340827942, "epoch": 6.79, "learning_rate": 1.6060862214708368e-05, "loss": 0.3346, "step": 8030, "task_loss": 0.35558173060417175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3934454917907715, "epoch": 6.79, "learning_rate": 1.6056635672020288e-05, "loss": 0.4103, "step": 8031, "task_loss": 0.4295128881931305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21254229545593262, "epoch": 6.79, "learning_rate": 1.6052409129332207e-05, "loss": 0.3439, "step": 8032, "task_loss": 0.6510332226753235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23942747712135315, "epoch": 6.79, "learning_rate": 1.6048182586644127e-05, "loss": 0.3563, "step": 8033, "task_loss": 0.34563490748405457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19090262055397034, "epoch": 6.79, "learning_rate": 1.6043956043956047e-05, "loss": 0.4454, "step": 8034, "task_loss": 0.09281444549560547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2765709161758423, "epoch": 6.79, "learning_rate": 1.6039729501267963e-05, "loss": 0.3416, "step": 8035, "task_loss": 0.5890077948570251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4097260534763336, "epoch": 6.79, "learning_rate": 1.6035502958579883e-05, "loss": 0.4477, "step": 8036, "task_loss": 0.6353673934936523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3127579391002655, "epoch": 6.79, "learning_rate": 1.6031276415891803e-05, "loss": 0.469, "step": 8037, "task_loss": 0.47293365001678467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5280681848526001, "epoch": 6.79, "learning_rate": 1.602704987320372e-05, "loss": 0.5216, "step": 8038, "task_loss": 0.42518875002861023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34957584738731384, "epoch": 6.79, "learning_rate": 1.602282333051564e-05, "loss": 0.4564, "step": 8039, "task_loss": 0.7410932183265686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4208882451057434, "epoch": 6.8, "learning_rate": 1.601859678782756e-05, "loss": 0.4007, "step": 8040, "task_loss": 0.530224621295929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43443965911865234, "epoch": 6.8, "learning_rate": 1.6014370245139475e-05, "loss": 0.4637, "step": 8041, "task_loss": 0.21614809334278107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3379107713699341, "epoch": 6.8, "learning_rate": 1.6010143702451398e-05, "loss": 0.4029, "step": 8042, "task_loss": 0.35711783170700073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19834917783737183, "epoch": 6.8, "learning_rate": 1.6005917159763314e-05, "loss": 0.3515, "step": 8043, "task_loss": 0.10279317200183868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23508857190608978, "epoch": 6.8, "learning_rate": 1.600169061707523e-05, "loss": 0.3821, "step": 8044, "task_loss": 0.4397507607936859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3264158368110657, "epoch": 6.8, "learning_rate": 1.5997464074387154e-05, "loss": 0.3823, "step": 8045, "task_loss": 0.7231268286705017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48114579916000366, "epoch": 6.8, "learning_rate": 1.599323753169907e-05, "loss": 0.4898, "step": 8046, "task_loss": 0.8730953335762024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.412562757730484, "epoch": 6.8, "learning_rate": 1.598901098901099e-05, "loss": 0.3786, "step": 8047, "task_loss": 0.20533229410648346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37530413269996643, "epoch": 6.8, "learning_rate": 1.598478444632291e-05, "loss": 0.4339, "step": 8048, "task_loss": 0.11649879068136215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3089049458503723, "epoch": 6.8, "learning_rate": 1.5980557903634826e-05, "loss": 0.3362, "step": 8049, "task_loss": 0.043812721967697144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3435078263282776, "epoch": 6.8, "learning_rate": 1.5976331360946746e-05, "loss": 0.4923, "step": 8050, "task_loss": 0.7447821497917175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3593711853027344, "epoch": 6.81, "learning_rate": 1.5972104818258665e-05, "loss": 0.3428, "step": 8051, "task_loss": 0.29679423570632935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6199342012405396, "epoch": 6.81, "learning_rate": 1.5967878275570582e-05, "loss": 0.5188, "step": 8052, "task_loss": 1.025733232498169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4931357800960541, "epoch": 6.81, "learning_rate": 1.5963651732882505e-05, "loss": 0.3856, "step": 8053, "task_loss": 0.11635943502187729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.296481192111969, "epoch": 6.81, "learning_rate": 1.595942519019442e-05, "loss": 0.3528, "step": 8054, "task_loss": 0.45406460762023926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3673345148563385, "epoch": 6.81, "learning_rate": 1.5955198647506338e-05, "loss": 0.3677, "step": 8055, "task_loss": 0.7397918701171875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3369954824447632, "epoch": 6.81, "learning_rate": 1.595097210481826e-05, "loss": 0.3799, "step": 8056, "task_loss": 0.7255717515945435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4376756548881531, "epoch": 6.81, "learning_rate": 1.5946745562130177e-05, "loss": 0.56, "step": 8057, "task_loss": 1.481479287147522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3143744468688965, "epoch": 6.81, "learning_rate": 1.5942519019442097e-05, "loss": 0.3216, "step": 8058, "task_loss": 0.8532222509384155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3906935155391693, "epoch": 6.81, "learning_rate": 1.5938292476754017e-05, "loss": 0.513, "step": 8059, "task_loss": 0.2701754868030548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5425615310668945, "epoch": 6.81, "learning_rate": 1.5934065934065933e-05, "loss": 0.5498, "step": 8060, "task_loss": 0.2165064811706543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32677197456359863, "epoch": 6.81, "learning_rate": 1.5929839391377853e-05, "loss": 0.3415, "step": 8061, "task_loss": 0.22302232682704926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4614410996437073, "epoch": 6.81, "learning_rate": 1.5925612848689772e-05, "loss": 0.4994, "step": 8062, "task_loss": 0.48615002632141113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35435038805007935, "epoch": 6.82, "learning_rate": 1.5921386306001692e-05, "loss": 0.4701, "step": 8063, "task_loss": 0.747526228427887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3174985349178314, "epoch": 6.82, "learning_rate": 1.5917159763313612e-05, "loss": 0.3389, "step": 8064, "task_loss": 0.32501181960105896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28691577911376953, "epoch": 6.82, "learning_rate": 1.5912933220625528e-05, "loss": 0.3381, "step": 8065, "task_loss": 0.2907180190086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42154568433761597, "epoch": 6.82, "learning_rate": 1.5908706677937448e-05, "loss": 0.361, "step": 8066, "task_loss": 0.3193168342113495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3747527599334717, "epoch": 6.82, "learning_rate": 1.5904480135249368e-05, "loss": 0.4731, "step": 8067, "task_loss": 1.0634963512420654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4335249662399292, "epoch": 6.82, "learning_rate": 1.5900253592561284e-05, "loss": 0.502, "step": 8068, "task_loss": 1.1207228899002075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3604487180709839, "epoch": 6.82, "learning_rate": 1.5896027049873204e-05, "loss": 0.4875, "step": 8069, "task_loss": 0.6643565893173218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6618340015411377, "epoch": 6.82, "learning_rate": 1.5891800507185124e-05, "loss": 0.5111, "step": 8070, "task_loss": 0.42158302664756775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8793841600418091, "epoch": 6.82, "learning_rate": 1.5887573964497043e-05, "loss": 0.5906, "step": 8071, "task_loss": 1.0742409229278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4195050597190857, "epoch": 6.82, "learning_rate": 1.588334742180896e-05, "loss": 0.3787, "step": 8072, "task_loss": 0.4573048949241638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5253006219863892, "epoch": 6.82, "learning_rate": 1.587912087912088e-05, "loss": 0.3946, "step": 8073, "task_loss": 1.1194708347320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5496973395347595, "epoch": 6.82, "learning_rate": 1.58748943364328e-05, "loss": 0.462, "step": 8074, "task_loss": 1.2671682834625244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17610062658786774, "epoch": 6.83, "learning_rate": 1.587066779374472e-05, "loss": 0.3822, "step": 8075, "task_loss": 0.14079108834266663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31717604398727417, "epoch": 6.83, "learning_rate": 1.5866441251056635e-05, "loss": 0.331, "step": 8076, "task_loss": 0.35425427556037903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3246527314186096, "epoch": 6.83, "learning_rate": 1.5862214708368555e-05, "loss": 0.4595, "step": 8077, "task_loss": 0.34041333198547363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21272632479667664, "epoch": 6.83, "learning_rate": 1.5857988165680475e-05, "loss": 0.3478, "step": 8078, "task_loss": 0.20563143491744995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48189061880111694, "epoch": 6.83, "learning_rate": 1.5853761622992394e-05, "loss": 0.3814, "step": 8079, "task_loss": 0.7871041297912598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40245136618614197, "epoch": 6.83, "learning_rate": 1.584953508030431e-05, "loss": 0.3914, "step": 8080, "task_loss": 1.0009925365447998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24773475527763367, "epoch": 6.83, "learning_rate": 1.584530853761623e-05, "loss": 0.4022, "step": 8081, "task_loss": 0.5443968772888184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2241898626089096, "epoch": 6.83, "learning_rate": 1.584108199492815e-05, "loss": 0.4694, "step": 8082, "task_loss": 0.055436864495277405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39965224266052246, "epoch": 6.83, "learning_rate": 1.5836855452240067e-05, "loss": 0.3547, "step": 8083, "task_loss": 1.2731119394302368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3657926619052887, "epoch": 6.83, "learning_rate": 1.5832628909551986e-05, "loss": 0.4419, "step": 8084, "task_loss": 1.1267801523208618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5794357061386108, "epoch": 6.83, "learning_rate": 1.5828402366863906e-05, "loss": 0.5081, "step": 8085, "task_loss": 0.4853847622871399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42030853033065796, "epoch": 6.83, "learning_rate": 1.5824175824175826e-05, "loss": 0.4069, "step": 8086, "task_loss": 0.2855437397956848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.527397871017456, "epoch": 6.84, "learning_rate": 1.5819949281487746e-05, "loss": 0.5237, "step": 8087, "task_loss": 0.38615819811820984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.194346085190773, "epoch": 6.84, "learning_rate": 1.5815722738799662e-05, "loss": 0.3106, "step": 8088, "task_loss": 0.2588060796260834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4728699326515198, "epoch": 6.84, "learning_rate": 1.581149619611158e-05, "loss": 0.3948, "step": 8089, "task_loss": 0.4079889953136444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20930883288383484, "epoch": 6.84, "learning_rate": 1.58072696534235e-05, "loss": 0.3449, "step": 8090, "task_loss": 0.3875212073326111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3761861324310303, "epoch": 6.84, "learning_rate": 1.5803043110735418e-05, "loss": 0.3651, "step": 8091, "task_loss": 0.6081957817077637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34621936082839966, "epoch": 6.84, "learning_rate": 1.579881656804734e-05, "loss": 0.4026, "step": 8092, "task_loss": 0.9961043000221252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5572715997695923, "epoch": 6.84, "learning_rate": 1.5794590025359257e-05, "loss": 0.3983, "step": 8093, "task_loss": 1.1120784282684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14668989181518555, "epoch": 6.84, "learning_rate": 1.5790363482671174e-05, "loss": 0.386, "step": 8094, "task_loss": 0.07613242417573929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33486998081207275, "epoch": 6.84, "learning_rate": 1.5786136939983097e-05, "loss": 0.3491, "step": 8095, "task_loss": 0.37807804346084595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5577993988990784, "epoch": 6.84, "learning_rate": 1.5781910397295013e-05, "loss": 0.3665, "step": 8096, "task_loss": 0.5054947733879089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37743669748306274, "epoch": 6.84, "learning_rate": 1.577768385460693e-05, "loss": 0.4887, "step": 8097, "task_loss": 0.568747341632843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3052997589111328, "epoch": 6.84, "learning_rate": 1.5773457311918853e-05, "loss": 0.3749, "step": 8098, "task_loss": 0.5515596270561218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23653361201286316, "epoch": 6.85, "learning_rate": 1.576923076923077e-05, "loss": 0.3166, "step": 8099, "task_loss": 0.20517557859420776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43176761269569397, "epoch": 6.85, "learning_rate": 1.576500422654269e-05, "loss": 0.4632, "step": 8100, "task_loss": 0.2481747716665268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31201624870300293, "epoch": 6.85, "learning_rate": 1.576077768385461e-05, "loss": 0.5004, "step": 8101, "task_loss": 0.9064463973045349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3761563003063202, "epoch": 6.85, "learning_rate": 1.5756551141166525e-05, "loss": 0.4795, "step": 8102, "task_loss": 0.09617482125759125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33675050735473633, "epoch": 6.85, "learning_rate": 1.5752324598478448e-05, "loss": 0.5245, "step": 8103, "task_loss": 0.49845263361930847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2771129012107849, "epoch": 6.85, "learning_rate": 1.5748098055790364e-05, "loss": 0.3804, "step": 8104, "task_loss": 0.3021808862686157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29359641671180725, "epoch": 6.85, "learning_rate": 1.574387151310228e-05, "loss": 0.3325, "step": 8105, "task_loss": 0.5776968002319336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3664187788963318, "epoch": 6.85, "learning_rate": 1.5739644970414204e-05, "loss": 0.3538, "step": 8106, "task_loss": 0.8243472576141357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3749851584434509, "epoch": 6.85, "learning_rate": 1.573541842772612e-05, "loss": 0.3746, "step": 8107, "task_loss": 2.871325731277466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30693602561950684, "epoch": 6.85, "learning_rate": 1.573119188503804e-05, "loss": 0.3447, "step": 8108, "task_loss": 0.6457070708274841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5174764394760132, "epoch": 6.85, "learning_rate": 1.572696534234996e-05, "loss": 0.479, "step": 8109, "task_loss": 0.482393741607666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30791014432907104, "epoch": 6.85, "learning_rate": 1.5722738799661876e-05, "loss": 0.4761, "step": 8110, "task_loss": 0.7153225541114807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4221128225326538, "epoch": 6.86, "learning_rate": 1.5718512256973796e-05, "loss": 0.4195, "step": 8111, "task_loss": 0.7556651830673218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17395806312561035, "epoch": 6.86, "learning_rate": 1.5714285714285715e-05, "loss": 0.3017, "step": 8112, "task_loss": 0.25805196166038513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33352524042129517, "epoch": 6.86, "learning_rate": 1.571005917159763e-05, "loss": 0.4653, "step": 8113, "task_loss": 0.5006245374679565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2153080403804779, "epoch": 6.86, "learning_rate": 1.570583262890955e-05, "loss": 0.4075, "step": 8114, "task_loss": 0.722095251083374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5717830657958984, "epoch": 6.86, "learning_rate": 1.570160608622147e-05, "loss": 0.4074, "step": 8115, "task_loss": 0.9062063694000244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5659950971603394, "epoch": 6.86, "learning_rate": 1.569737954353339e-05, "loss": 0.467, "step": 8116, "task_loss": 0.21606139838695526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37353789806365967, "epoch": 6.86, "learning_rate": 1.569315300084531e-05, "loss": 0.4096, "step": 8117, "task_loss": 0.9258955717086792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2989078462123871, "epoch": 6.86, "learning_rate": 1.5688926458157227e-05, "loss": 0.2604, "step": 8118, "task_loss": 1.034909725189209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4253457188606262, "epoch": 6.86, "learning_rate": 1.5684699915469147e-05, "loss": 0.4764, "step": 8119, "task_loss": 1.0334888696670532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23852607607841492, "epoch": 6.86, "learning_rate": 1.5680473372781066e-05, "loss": 0.5042, "step": 8120, "task_loss": 0.6262539029121399 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47986072301864624, "epoch": 6.86, "learning_rate": 1.5676246830092986e-05, "loss": 0.4565, "step": 8121, "task_loss": 0.4495536684989929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32538288831710815, "epoch": 6.87, "learning_rate": 1.5672020287404903e-05, "loss": 0.4453, "step": 8122, "task_loss": 0.6178637742996216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43225064873695374, "epoch": 6.87, "learning_rate": 1.5667793744716822e-05, "loss": 0.3901, "step": 8123, "task_loss": 0.3126216530799866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6894912719726562, "epoch": 6.87, "learning_rate": 1.5663567202028742e-05, "loss": 0.4547, "step": 8124, "task_loss": 0.9569483399391174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42321085929870605, "epoch": 6.87, "learning_rate": 1.565934065934066e-05, "loss": 0.4053, "step": 8125, "task_loss": 0.7773823738098145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2936991751194, "epoch": 6.87, "learning_rate": 1.5655114116652578e-05, "loss": 0.4166, "step": 8126, "task_loss": 0.8166089057922363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4077661335468292, "epoch": 6.87, "learning_rate": 1.5650887573964498e-05, "loss": 0.5046, "step": 8127, "task_loss": 0.6761220693588257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31363582611083984, "epoch": 6.87, "learning_rate": 1.5646661031276418e-05, "loss": 0.4506, "step": 8128, "task_loss": 0.21208417415618896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5668113231658936, "epoch": 6.87, "learning_rate": 1.5642434488588337e-05, "loss": 0.4882, "step": 8129, "task_loss": 0.5586158037185669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4881879687309265, "epoch": 6.87, "learning_rate": 1.5638207945900254e-05, "loss": 0.4445, "step": 8130, "task_loss": 0.04391130805015564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2599678337574005, "epoch": 6.87, "learning_rate": 1.5633981403212173e-05, "loss": 0.3197, "step": 8131, "task_loss": 0.2690867483615875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4973011016845703, "epoch": 6.87, "learning_rate": 1.5629754860524093e-05, "loss": 0.5662, "step": 8132, "task_loss": 0.9081259369850159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5619357228279114, "epoch": 6.87, "learning_rate": 1.562552831783601e-05, "loss": 0.4176, "step": 8133, "task_loss": 0.23953431844711304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40733802318573, "epoch": 6.88, "learning_rate": 1.562130177514793e-05, "loss": 0.3983, "step": 8134, "task_loss": 0.8377662897109985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4572140574455261, "epoch": 6.88, "learning_rate": 1.561707523245985e-05, "loss": 0.5337, "step": 8135, "task_loss": 0.643834114074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3703729510307312, "epoch": 6.88, "learning_rate": 1.5612848689771765e-05, "loss": 0.393, "step": 8136, "task_loss": 0.4513920247554779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6426957845687866, "epoch": 6.88, "learning_rate": 1.560862214708369e-05, "loss": 0.5496, "step": 8137, "task_loss": 0.5394741296768188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.55413818359375, "epoch": 6.88, "learning_rate": 1.5604395604395605e-05, "loss": 0.4153, "step": 8138, "task_loss": 0.5221492052078247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24530254304409027, "epoch": 6.88, "learning_rate": 1.5600169061707525e-05, "loss": 0.5809, "step": 8139, "task_loss": 0.34876152873039246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4040549695491791, "epoch": 6.88, "learning_rate": 1.5595942519019444e-05, "loss": 0.3158, "step": 8140, "task_loss": 0.4428268373012543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27233025431632996, "epoch": 6.88, "learning_rate": 1.559171597633136e-05, "loss": 0.3513, "step": 8141, "task_loss": 0.12480652332305908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23360180854797363, "epoch": 6.88, "learning_rate": 1.558748943364328e-05, "loss": 0.4417, "step": 8142, "task_loss": 0.5513081550598145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5700194835662842, "epoch": 6.88, "learning_rate": 1.55832628909552e-05, "loss": 0.5125, "step": 8143, "task_loss": 0.5107743740081787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30233046412467957, "epoch": 6.88, "learning_rate": 1.5579036348267116e-05, "loss": 0.3763, "step": 8144, "task_loss": 0.6201931238174438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4131520390510559, "epoch": 6.88, "learning_rate": 1.557480980557904e-05, "loss": 0.3749, "step": 8145, "task_loss": 0.5864682793617249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4278145432472229, "epoch": 6.89, "learning_rate": 1.5570583262890956e-05, "loss": 0.4052, "step": 8146, "task_loss": 0.6012652516365051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5053829550743103, "epoch": 6.89, "learning_rate": 1.5566356720202872e-05, "loss": 0.3906, "step": 8147, "task_loss": 0.4206555187702179 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2978106737136841, "epoch": 6.89, "learning_rate": 1.5562130177514795e-05, "loss": 0.4727, "step": 8148, "task_loss": 0.41058239340782166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3008463382720947, "epoch": 6.89, "learning_rate": 1.5557903634826712e-05, "loss": 0.445, "step": 8149, "task_loss": 0.2962489724159241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6150112152099609, "epoch": 6.89, "learning_rate": 1.555367709213863e-05, "loss": 0.5465, "step": 8150, "task_loss": 0.7760774493217468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46302351355552673, "epoch": 6.89, "learning_rate": 1.554945054945055e-05, "loss": 0.4248, "step": 8151, "task_loss": 1.3289170265197754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28753024339675903, "epoch": 6.89, "learning_rate": 1.5545224006762468e-05, "loss": 0.4847, "step": 8152, "task_loss": 0.47131258249282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4746450185775757, "epoch": 6.89, "learning_rate": 1.5540997464074387e-05, "loss": 0.4887, "step": 8153, "task_loss": 0.19067645072937012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26616308093070984, "epoch": 6.89, "learning_rate": 1.5536770921386307e-05, "loss": 0.3738, "step": 8154, "task_loss": 0.5526065826416016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3078230321407318, "epoch": 6.89, "learning_rate": 1.5532544378698223e-05, "loss": 0.3285, "step": 8155, "task_loss": 0.2108541578054428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5634310841560364, "epoch": 6.89, "learning_rate": 1.5528317836010147e-05, "loss": 0.4527, "step": 8156, "task_loss": 1.3418488502502441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3835914731025696, "epoch": 6.89, "learning_rate": 1.5524091293322063e-05, "loss": 0.524, "step": 8157, "task_loss": 1.2033747434616089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4126585125923157, "epoch": 6.9, "learning_rate": 1.5519864750633983e-05, "loss": 0.3997, "step": 8158, "task_loss": 0.8263439536094666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3593077063560486, "epoch": 6.9, "learning_rate": 1.5515638207945902e-05, "loss": 0.4915, "step": 8159, "task_loss": 0.3045428991317749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21504604816436768, "epoch": 6.9, "learning_rate": 1.551141166525782e-05, "loss": 0.3788, "step": 8160, "task_loss": 0.271843284368515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3327184021472931, "epoch": 6.9, "learning_rate": 1.550718512256974e-05, "loss": 0.4636, "step": 8161, "task_loss": 0.08891170471906662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41336962580680847, "epoch": 6.9, "learning_rate": 1.5502958579881658e-05, "loss": 0.3772, "step": 8162, "task_loss": 0.47042813897132874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22254504263401031, "epoch": 6.9, "learning_rate": 1.5498732037193575e-05, "loss": 0.4226, "step": 8163, "task_loss": 1.122079610824585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3511051833629608, "epoch": 6.9, "learning_rate": 1.5494505494505494e-05, "loss": 0.4026, "step": 8164, "task_loss": 0.8661563396453857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31127703189849854, "epoch": 6.9, "learning_rate": 1.5490278951817414e-05, "loss": 0.256, "step": 8165, "task_loss": 0.08318139612674713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32370615005493164, "epoch": 6.9, "learning_rate": 1.5486052409129334e-05, "loss": 0.4869, "step": 8166, "task_loss": 1.0473151206970215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3904055058956146, "epoch": 6.9, "learning_rate": 1.5481825866441254e-05, "loss": 0.4573, "step": 8167, "task_loss": 0.535521388053894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49605295062065125, "epoch": 6.9, "learning_rate": 1.547759932375317e-05, "loss": 0.4706, "step": 8168, "task_loss": 0.9088897705078125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3962818682193756, "epoch": 6.9, "learning_rate": 1.547337278106509e-05, "loss": 0.416, "step": 8169, "task_loss": 0.7820472121238708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31070685386657715, "epoch": 6.91, "learning_rate": 1.546914623837701e-05, "loss": 0.3688, "step": 8170, "task_loss": 0.4734131097793579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5645719170570374, "epoch": 6.91, "learning_rate": 1.5464919695688926e-05, "loss": 0.5806, "step": 8171, "task_loss": 0.4287545084953308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4783346951007843, "epoch": 6.91, "learning_rate": 1.5460693153000845e-05, "loss": 0.4706, "step": 8172, "task_loss": 0.47039905190467834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36929216980934143, "epoch": 6.91, "learning_rate": 1.5456466610312765e-05, "loss": 0.3397, "step": 8173, "task_loss": 1.265995979309082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6413061618804932, "epoch": 6.91, "learning_rate": 1.5452240067624685e-05, "loss": 0.5159, "step": 8174, "task_loss": 1.1344407796859741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9467734098434448, "epoch": 6.91, "learning_rate": 1.54480135249366e-05, "loss": 0.4928, "step": 8175, "task_loss": 1.5185370445251465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5044880509376526, "epoch": 6.91, "learning_rate": 1.544378698224852e-05, "loss": 0.41, "step": 8176, "task_loss": 0.31301987171173096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5570403337478638, "epoch": 6.91, "learning_rate": 1.543956043956044e-05, "loss": 0.5241, "step": 8177, "task_loss": 1.453322410583496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19055254757404327, "epoch": 6.91, "learning_rate": 1.5435333896872357e-05, "loss": 0.3988, "step": 8178, "task_loss": 0.3592352867126465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18470153212547302, "epoch": 6.91, "learning_rate": 1.543110735418428e-05, "loss": 0.4151, "step": 8179, "task_loss": 0.13249756395816803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3627029359340668, "epoch": 6.91, "learning_rate": 1.5426880811496197e-05, "loss": 0.3906, "step": 8180, "task_loss": 0.699699878692627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3549533486366272, "epoch": 6.91, "learning_rate": 1.5422654268808116e-05, "loss": 0.4659, "step": 8181, "task_loss": 0.49538061022758484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46586260199546814, "epoch": 6.92, "learning_rate": 1.5418427726120036e-05, "loss": 0.3822, "step": 8182, "task_loss": 0.8771148324012756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6045873165130615, "epoch": 6.92, "learning_rate": 1.5414201183431952e-05, "loss": 0.4479, "step": 8183, "task_loss": 1.226340651512146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4289664626121521, "epoch": 6.92, "learning_rate": 1.5409974640743872e-05, "loss": 0.443, "step": 8184, "task_loss": 1.225841760635376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5048332810401917, "epoch": 6.92, "learning_rate": 1.5405748098055792e-05, "loss": 0.3656, "step": 8185, "task_loss": 0.11877667158842087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.431664377450943, "epoch": 6.92, "learning_rate": 1.5401521555367708e-05, "loss": 0.4556, "step": 8186, "task_loss": 1.0210868120193481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3943973183631897, "epoch": 6.92, "learning_rate": 1.539729501267963e-05, "loss": 0.4322, "step": 8187, "task_loss": 0.6154343485832214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4577901363372803, "epoch": 6.92, "learning_rate": 1.5393068469991548e-05, "loss": 0.3572, "step": 8188, "task_loss": 0.6505711674690247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3881218135356903, "epoch": 6.92, "learning_rate": 1.5388841927303464e-05, "loss": 0.4488, "step": 8189, "task_loss": 0.6387543082237244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3238261044025421, "epoch": 6.92, "learning_rate": 1.5384615384615387e-05, "loss": 0.321, "step": 8190, "task_loss": 0.42165616154670715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4473949670791626, "epoch": 6.92, "learning_rate": 1.5380388841927304e-05, "loss": 0.3374, "step": 8191, "task_loss": 0.3603472411632538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6827565431594849, "epoch": 6.92, "learning_rate": 1.5376162299239223e-05, "loss": 0.4922, "step": 8192, "task_loss": 0.6123996376991272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30786436796188354, "epoch": 6.93, "learning_rate": 1.5371935756551143e-05, "loss": 0.3905, "step": 8193, "task_loss": 0.6191640496253967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3981778621673584, "epoch": 6.93, "learning_rate": 1.536770921386306e-05, "loss": 0.4501, "step": 8194, "task_loss": 0.2218705713748932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5682381391525269, "epoch": 6.93, "learning_rate": 1.536348267117498e-05, "loss": 0.4223, "step": 8195, "task_loss": 0.29919493198394775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4306694269180298, "epoch": 6.93, "learning_rate": 1.53592561284869e-05, "loss": 0.4843, "step": 8196, "task_loss": 0.7966792583465576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3472772240638733, "epoch": 6.93, "learning_rate": 1.5355029585798815e-05, "loss": 0.3953, "step": 8197, "task_loss": 0.6032739877700806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31867820024490356, "epoch": 6.93, "learning_rate": 1.535080304311074e-05, "loss": 0.4119, "step": 8198, "task_loss": 0.3969121277332306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21640893816947937, "epoch": 6.93, "learning_rate": 1.5346576500422655e-05, "loss": 0.3359, "step": 8199, "task_loss": 1.3615880012512207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2863558530807495, "epoch": 6.93, "learning_rate": 1.534234995773457e-05, "loss": 0.3792, "step": 8200, "task_loss": 0.1618276834487915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2689869701862335, "epoch": 6.93, "learning_rate": 1.5338123415046494e-05, "loss": 0.5147, "step": 8201, "task_loss": 0.7231017351150513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23083674907684326, "epoch": 6.93, "learning_rate": 1.533389687235841e-05, "loss": 0.3249, "step": 8202, "task_loss": 0.9172165393829346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3891570568084717, "epoch": 6.93, "learning_rate": 1.532967032967033e-05, "loss": 0.3953, "step": 8203, "task_loss": 0.7521423101425171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22845682501792908, "epoch": 6.93, "learning_rate": 1.532544378698225e-05, "loss": 0.3412, "step": 8204, "task_loss": 0.1563791185617447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21759039163589478, "epoch": 6.94, "learning_rate": 1.5321217244294166e-05, "loss": 0.2823, "step": 8205, "task_loss": 0.5944491624832153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15138694643974304, "epoch": 6.94, "learning_rate": 1.5316990701606086e-05, "loss": 0.3447, "step": 8206, "task_loss": 0.8165296912193298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26458975672721863, "epoch": 6.94, "learning_rate": 1.5312764158918006e-05, "loss": 0.3479, "step": 8207, "task_loss": 0.2845596373081207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1431186944246292, "epoch": 6.94, "learning_rate": 1.5308537616229926e-05, "loss": 0.3306, "step": 8208, "task_loss": 0.009467852301895618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3068787753582001, "epoch": 6.94, "learning_rate": 1.5304311073541845e-05, "loss": 0.4213, "step": 8209, "task_loss": 0.8935056924819946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5556225180625916, "epoch": 6.94, "learning_rate": 1.530008453085376e-05, "loss": 0.4376, "step": 8210, "task_loss": 0.3257346749305725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45078691840171814, "epoch": 6.94, "learning_rate": 1.529585798816568e-05, "loss": 0.4185, "step": 8211, "task_loss": 0.7217759490013123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1918867826461792, "epoch": 6.94, "learning_rate": 1.52916314454776e-05, "loss": 0.4319, "step": 8212, "task_loss": 0.37170982360839844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33981025218963623, "epoch": 6.94, "learning_rate": 1.5287404902789517e-05, "loss": 0.4704, "step": 8213, "task_loss": 0.2498490959405899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3119122385978699, "epoch": 6.94, "learning_rate": 1.5283178360101437e-05, "loss": 0.3156, "step": 8214, "task_loss": 0.2671097218990326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7428929805755615, "epoch": 6.94, "learning_rate": 1.5278951817413357e-05, "loss": 0.5126, "step": 8215, "task_loss": 1.3264228105545044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3727587163448334, "epoch": 6.94, "learning_rate": 1.5274725274725277e-05, "loss": 0.2992, "step": 8216, "task_loss": 0.3081343173980713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41894060373306274, "epoch": 6.95, "learning_rate": 1.5270498732037193e-05, "loss": 0.4916, "step": 8217, "task_loss": 1.698621392250061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3773888349533081, "epoch": 6.95, "learning_rate": 1.5266272189349113e-05, "loss": 0.5325, "step": 8218, "task_loss": 1.126094102859497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24135497212409973, "epoch": 6.95, "learning_rate": 1.5262045646661032e-05, "loss": 0.36, "step": 8219, "task_loss": 0.14775274693965912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5234148502349854, "epoch": 6.95, "learning_rate": 1.525781910397295e-05, "loss": 0.5619, "step": 8220, "task_loss": 0.8358763456344604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6000182628631592, "epoch": 6.95, "learning_rate": 1.5253592561284869e-05, "loss": 0.4682, "step": 8221, "task_loss": 1.2217031717300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3514356315135956, "epoch": 6.95, "learning_rate": 1.5249366018596788e-05, "loss": 0.4394, "step": 8222, "task_loss": 0.9565135836601257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5196155309677124, "epoch": 6.95, "learning_rate": 1.5245139475908706e-05, "loss": 0.4129, "step": 8223, "task_loss": 0.6463860869407654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1928631067276001, "epoch": 6.95, "learning_rate": 1.5240912933220628e-05, "loss": 0.4404, "step": 8224, "task_loss": 0.23237508535385132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44084781408309937, "epoch": 6.95, "learning_rate": 1.5236686390532546e-05, "loss": 0.427, "step": 8225, "task_loss": 0.04840904846787453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4103754758834839, "epoch": 6.95, "learning_rate": 1.5232459847844462e-05, "loss": 0.4236, "step": 8226, "task_loss": 0.3935781419277191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5178766846656799, "epoch": 6.95, "learning_rate": 1.5228233305156384e-05, "loss": 0.5063, "step": 8227, "task_loss": 0.8814098834991455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.258184015750885, "epoch": 6.95, "learning_rate": 1.5224006762468302e-05, "loss": 0.2491, "step": 8228, "task_loss": 0.09820559620857239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36516836285591125, "epoch": 6.96, "learning_rate": 1.521978021978022e-05, "loss": 0.4404, "step": 8229, "task_loss": 0.17960189282894135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5361000895500183, "epoch": 6.96, "learning_rate": 1.521555367709214e-05, "loss": 0.6251, "step": 8230, "task_loss": 0.46445783972740173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012030601501465, "epoch": 6.96, "learning_rate": 1.5211327134404057e-05, "loss": 0.3734, "step": 8231, "task_loss": 1.2340478897094727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2361038774251938, "epoch": 6.96, "learning_rate": 1.5207100591715979e-05, "loss": 0.3507, "step": 8232, "task_loss": 0.4812510311603546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31439924240112305, "epoch": 6.96, "learning_rate": 1.5202874049027895e-05, "loss": 0.3039, "step": 8233, "task_loss": 0.708295464515686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6341074705123901, "epoch": 6.96, "learning_rate": 1.5198647506339813e-05, "loss": 0.484, "step": 8234, "task_loss": 1.037630558013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6677741408348083, "epoch": 6.96, "learning_rate": 1.5194420963651735e-05, "loss": 0.5158, "step": 8235, "task_loss": 0.7357756495475769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18401606380939484, "epoch": 6.96, "learning_rate": 1.5190194420963653e-05, "loss": 0.3939, "step": 8236, "task_loss": 0.5122273564338684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42348361015319824, "epoch": 6.96, "learning_rate": 1.5185967878275573e-05, "loss": 0.3968, "step": 8237, "task_loss": 0.9195177555084229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47977378964424133, "epoch": 6.96, "learning_rate": 1.518174133558749e-05, "loss": 0.4024, "step": 8238, "task_loss": 0.5926809906959534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4956817626953125, "epoch": 6.96, "learning_rate": 1.5177514792899409e-05, "loss": 0.4297, "step": 8239, "task_loss": 0.5607907772064209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.454342782497406, "epoch": 6.96, "learning_rate": 1.5173288250211328e-05, "loss": 0.4335, "step": 8240, "task_loss": 0.32166731357574463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6324416399002075, "epoch": 6.97, "learning_rate": 1.5169061707523246e-05, "loss": 0.4456, "step": 8241, "task_loss": 0.3131512701511383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2104112207889557, "epoch": 6.97, "learning_rate": 1.5164835164835164e-05, "loss": 0.2718, "step": 8242, "task_loss": 0.14100228250026703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27196985483169556, "epoch": 6.97, "learning_rate": 1.5160608622147084e-05, "loss": 0.4041, "step": 8243, "task_loss": 0.32193055748939514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32567691802978516, "epoch": 6.97, "learning_rate": 1.5156382079459002e-05, "loss": 0.3799, "step": 8244, "task_loss": 0.7688029408454895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31179162859916687, "epoch": 6.97, "learning_rate": 1.5152155536770924e-05, "loss": 0.3678, "step": 8245, "task_loss": 0.6757123470306396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32105210423469543, "epoch": 6.97, "learning_rate": 1.5147928994082842e-05, "loss": 0.4709, "step": 8246, "task_loss": 0.4627591669559479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36061960458755493, "epoch": 6.97, "learning_rate": 1.514370245139476e-05, "loss": 0.3984, "step": 8247, "task_loss": 0.561049222946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3668675422668457, "epoch": 6.97, "learning_rate": 1.513947590870668e-05, "loss": 0.4749, "step": 8248, "task_loss": 0.8205997347831726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29102611541748047, "epoch": 6.97, "learning_rate": 1.5135249366018598e-05, "loss": 0.3436, "step": 8249, "task_loss": 0.5333042144775391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7841709852218628, "epoch": 6.97, "learning_rate": 1.5131022823330516e-05, "loss": 0.5745, "step": 8250, "task_loss": 1.2410589456558228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3313364088535309, "epoch": 6.97, "learning_rate": 1.5126796280642435e-05, "loss": 0.391, "step": 8251, "task_loss": 0.29525822401046753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2859940826892853, "epoch": 6.97, "learning_rate": 1.5122569737954353e-05, "loss": 0.453, "step": 8252, "task_loss": 1.2679792642593384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29265299439430237, "epoch": 6.98, "learning_rate": 1.5118343195266275e-05, "loss": 0.332, "step": 8253, "task_loss": 0.7017331719398499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2722637355327606, "epoch": 6.98, "learning_rate": 1.5114116652578191e-05, "loss": 0.4774, "step": 8254, "task_loss": 0.4859996438026428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4201558232307434, "epoch": 6.98, "learning_rate": 1.510989010989011e-05, "loss": 0.4082, "step": 8255, "task_loss": 0.4891192317008972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36740803718566895, "epoch": 6.98, "learning_rate": 1.510566356720203e-05, "loss": 0.3569, "step": 8256, "task_loss": 0.5057170391082764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3489178717136383, "epoch": 6.98, "learning_rate": 1.5101437024513949e-05, "loss": 0.3125, "step": 8257, "task_loss": 0.30894824862480164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25977516174316406, "epoch": 6.98, "learning_rate": 1.5097210481825865e-05, "loss": 0.3352, "step": 8258, "task_loss": 0.7753267884254456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4740365743637085, "epoch": 6.98, "learning_rate": 1.5092983939137786e-05, "loss": 0.5811, "step": 8259, "task_loss": 1.9040555953979492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33003661036491394, "epoch": 6.98, "learning_rate": 1.5088757396449705e-05, "loss": 0.4145, "step": 8260, "task_loss": 0.4096348285675049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3292182981967926, "epoch": 6.98, "learning_rate": 1.5084530853761624e-05, "loss": 0.471, "step": 8261, "task_loss": 0.49542826414108276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5234651565551758, "epoch": 6.98, "learning_rate": 1.5080304311073542e-05, "loss": 0.512, "step": 8262, "task_loss": 0.5969032049179077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36772823333740234, "epoch": 6.98, "learning_rate": 1.507607776838546e-05, "loss": 0.3241, "step": 8263, "task_loss": 0.08594870567321777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7351909875869751, "epoch": 6.99, "learning_rate": 1.5071851225697382e-05, "loss": 0.5296, "step": 8264, "task_loss": 0.7577266693115234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3588067591190338, "epoch": 6.99, "learning_rate": 1.5067624683009298e-05, "loss": 0.4477, "step": 8265, "task_loss": 1.1385146379470825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3751617670059204, "epoch": 6.99, "learning_rate": 1.506339814032122e-05, "loss": 0.4245, "step": 8266, "task_loss": 0.3227076232433319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4811413884162903, "epoch": 6.99, "learning_rate": 1.5059171597633138e-05, "loss": 0.4525, "step": 8267, "task_loss": 1.201701045036316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5206922292709351, "epoch": 6.99, "learning_rate": 1.5054945054945056e-05, "loss": 0.4455, "step": 8268, "task_loss": 1.453420639038086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1632324755191803, "epoch": 6.99, "learning_rate": 1.5050718512256975e-05, "loss": 0.3331, "step": 8269, "task_loss": 0.027013765648007393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5707831382751465, "epoch": 6.99, "learning_rate": 1.5046491969568893e-05, "loss": 0.4053, "step": 8270, "task_loss": 0.11289359629154205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4111563563346863, "epoch": 6.99, "learning_rate": 1.5042265426880811e-05, "loss": 0.3912, "step": 8271, "task_loss": 0.9436833262443542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7225178480148315, "epoch": 6.99, "learning_rate": 1.5038038884192731e-05, "loss": 0.465, "step": 8272, "task_loss": 0.5543030500411987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3046078085899353, "epoch": 6.99, "learning_rate": 1.503381234150465e-05, "loss": 0.3233, "step": 8273, "task_loss": 0.2614608108997345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3342648148536682, "epoch": 6.99, "learning_rate": 1.502958579881657e-05, "loss": 0.408, "step": 8274, "task_loss": 0.34398412704467773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3740657567977905, "epoch": 6.99, "learning_rate": 1.5025359256128487e-05, "loss": 0.387, "step": 8275, "task_loss": 0.8366837501525879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38045695424079895, "epoch": 7.0, "learning_rate": 1.5021132713440405e-05, "loss": 0.4089, "step": 8276, "task_loss": 1.1091407537460327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2700023353099823, "epoch": 7.0, "learning_rate": 1.5016906170752327e-05, "loss": 0.4341, "step": 8277, "task_loss": 0.7705312967300415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5198190212249756, "epoch": 7.0, "learning_rate": 1.5012679628064245e-05, "loss": 0.4332, "step": 8278, "task_loss": 0.9903658032417297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24078455567359924, "epoch": 7.0, "learning_rate": 1.5008453085376163e-05, "loss": 0.3135, "step": 8279, "task_loss": 0.14397670328617096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7521635293960571, "epoch": 7.0, "learning_rate": 1.5004226542688082e-05, "loss": 0.4739, "step": 8280, "task_loss": 0.29760488867759705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28123408555984497, "epoch": 7.0, "learning_rate": 1.5e-05, "loss": 0.4135, "step": 8281, "task_loss": 0.2527936100959778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4413542151451111, "epoch": 7.0, "learning_rate": 1.499577345731192e-05, "loss": 0.6427, "step": 8282, "task_loss": 0.5521907210350037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46478211879730225, "epoch": 7.0, "learning_rate": 1.4991546914623838e-05, "loss": 0.3155, "step": 8283, "task_loss": 0.31751516461372375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2643257677555084, "epoch": 7.0, "learning_rate": 1.4987320371935756e-05, "loss": 0.3724, "step": 8284, "task_loss": 0.4382242262363434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39968597888946533, "epoch": 7.0, "learning_rate": 1.4983093829247678e-05, "loss": 0.4576, "step": 8285, "task_loss": 0.6146531701087952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35490909218788147, "epoch": 7.0, "learning_rate": 1.4978867286559594e-05, "loss": 0.4603, "step": 8286, "task_loss": 0.6384093165397644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36409473419189453, "epoch": 7.01, "learning_rate": 1.4974640743871512e-05, "loss": 0.3614, "step": 8287, "task_loss": 0.720887303352356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45239415764808655, "epoch": 7.01, "learning_rate": 1.4970414201183433e-05, "loss": 0.4816, "step": 8288, "task_loss": 0.9364504814147949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5099371671676636, "epoch": 7.01, "learning_rate": 1.4966187658495352e-05, "loss": 0.4192, "step": 8289, "task_loss": 0.6691038012504578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36551812291145325, "epoch": 7.01, "learning_rate": 1.4961961115807271e-05, "loss": 0.3389, "step": 8290, "task_loss": 0.7291792035102844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.574684739112854, "epoch": 7.01, "learning_rate": 1.495773457311919e-05, "loss": 0.4111, "step": 8291, "task_loss": 0.5037595629692078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2524784803390503, "epoch": 7.01, "learning_rate": 1.4953508030431107e-05, "loss": 0.4496, "step": 8292, "task_loss": 0.6400421261787415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23396465182304382, "epoch": 7.01, "learning_rate": 1.4949281487743027e-05, "loss": 0.2952, "step": 8293, "task_loss": 0.20518313348293304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3309219181537628, "epoch": 7.01, "learning_rate": 1.4945054945054945e-05, "loss": 0.377, "step": 8294, "task_loss": 1.1114895343780518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4212479889392853, "epoch": 7.01, "learning_rate": 1.4940828402366867e-05, "loss": 0.4089, "step": 8295, "task_loss": 0.11287232488393784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24975335597991943, "epoch": 7.01, "learning_rate": 1.4936601859678785e-05, "loss": 0.408, "step": 8296, "task_loss": 0.24343259632587433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45902740955352783, "epoch": 7.01, "learning_rate": 1.4932375316990701e-05, "loss": 0.4378, "step": 8297, "task_loss": 0.9787672162055969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26421287655830383, "epoch": 7.01, "learning_rate": 1.4928148774302622e-05, "loss": 0.3334, "step": 8298, "task_loss": 0.190673828125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41736918687820435, "epoch": 7.02, "learning_rate": 1.492392223161454e-05, "loss": 0.4539, "step": 8299, "task_loss": 0.8382971882820129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5746397972106934, "epoch": 7.02, "learning_rate": 1.4919695688926458e-05, "loss": 0.4608, "step": 8300, "task_loss": 0.9622389078140259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3626786470413208, "epoch": 7.02, "learning_rate": 1.4915469146238378e-05, "loss": 0.4559, "step": 8301, "task_loss": 0.8607772588729858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8122441172599792, "epoch": 7.02, "learning_rate": 1.4911242603550296e-05, "loss": 0.4078, "step": 8302, "task_loss": 0.15627838671207428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6141949892044067, "epoch": 7.02, "learning_rate": 1.4907016060862216e-05, "loss": 0.475, "step": 8303, "task_loss": 1.0471584796905518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2582205533981323, "epoch": 7.02, "learning_rate": 1.4902789518174134e-05, "loss": 0.4039, "step": 8304, "task_loss": 1.020057201385498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 1.1612155437469482, "epoch": 7.02, "learning_rate": 1.4898562975486052e-05, "loss": 0.6594, "step": 8305, "task_loss": 1.2823190689086914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45852434635162354, "epoch": 7.02, "learning_rate": 1.4894336432797974e-05, "loss": 0.4157, "step": 8306, "task_loss": 0.6477389335632324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2611725330352783, "epoch": 7.02, "learning_rate": 1.489010989010989e-05, "loss": 0.4112, "step": 8307, "task_loss": 0.11670973151922226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2922886610031128, "epoch": 7.02, "learning_rate": 1.4885883347421808e-05, "loss": 0.4607, "step": 8308, "task_loss": 0.5235911011695862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20869910717010498, "epoch": 7.02, "learning_rate": 1.488165680473373e-05, "loss": 0.3471, "step": 8309, "task_loss": 0.1230420172214508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4804826080799103, "epoch": 7.02, "learning_rate": 1.4877430262045647e-05, "loss": 0.3813, "step": 8310, "task_loss": 0.37991058826446533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2190600484609604, "epoch": 7.03, "learning_rate": 1.4873203719357567e-05, "loss": 0.3699, "step": 8311, "task_loss": 0.8183318376541138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5728894472122192, "epoch": 7.03, "learning_rate": 1.4868977176669485e-05, "loss": 0.4008, "step": 8312, "task_loss": 1.2204796075820923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49801915884017944, "epoch": 7.03, "learning_rate": 1.4864750633981403e-05, "loss": 0.5616, "step": 8313, "task_loss": 1.0254170894622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4623652994632721, "epoch": 7.03, "learning_rate": 1.4860524091293323e-05, "loss": 0.3879, "step": 8314, "task_loss": 1.3507338762283325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3845575153827667, "epoch": 7.03, "learning_rate": 1.4856297548605241e-05, "loss": 0.5037, "step": 8315, "task_loss": 1.0278867483139038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3353651165962219, "epoch": 7.03, "learning_rate": 1.4852071005917159e-05, "loss": 0.3757, "step": 8316, "task_loss": 0.23556576669216156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34871047735214233, "epoch": 7.03, "learning_rate": 1.484784446322908e-05, "loss": 0.4148, "step": 8317, "task_loss": 0.43021389842033386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22020882368087769, "epoch": 7.03, "learning_rate": 1.4843617920540997e-05, "loss": 0.2869, "step": 8318, "task_loss": 0.2829124927520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3685554265975952, "epoch": 7.03, "learning_rate": 1.4839391377852918e-05, "loss": 0.3395, "step": 8319, "task_loss": 0.4097379744052887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3138637840747833, "epoch": 7.03, "learning_rate": 1.4835164835164836e-05, "loss": 0.4665, "step": 8320, "task_loss": 0.4797903597354889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4466157853603363, "epoch": 7.03, "learning_rate": 1.4830938292476754e-05, "loss": 0.4033, "step": 8321, "task_loss": 0.6313538551330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49122291803359985, "epoch": 7.03, "learning_rate": 1.4826711749788674e-05, "loss": 0.4366, "step": 8322, "task_loss": 0.5350803732872009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6044434309005737, "epoch": 7.04, "learning_rate": 1.4822485207100592e-05, "loss": 0.5513, "step": 8323, "task_loss": 0.7601437568664551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1923898309469223, "epoch": 7.04, "learning_rate": 1.4818258664412512e-05, "loss": 0.3461, "step": 8324, "task_loss": 0.09433046728372574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5911145210266113, "epoch": 7.04, "learning_rate": 1.481403212172443e-05, "loss": 0.5336, "step": 8325, "task_loss": 0.2179163098335266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2999769151210785, "epoch": 7.04, "learning_rate": 1.4809805579036348e-05, "loss": 0.3369, "step": 8326, "task_loss": 0.18105952441692352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32190385460853577, "epoch": 7.04, "learning_rate": 1.480557903634827e-05, "loss": 0.4078, "step": 8327, "task_loss": 0.7043678164482117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38484692573547363, "epoch": 7.04, "learning_rate": 1.4801352493660187e-05, "loss": 0.4162, "step": 8328, "task_loss": 0.25875455141067505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3129620850086212, "epoch": 7.04, "learning_rate": 1.4797125950972104e-05, "loss": 0.4042, "step": 8329, "task_loss": 0.20342200994491577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.556844174861908, "epoch": 7.04, "learning_rate": 1.4792899408284025e-05, "loss": 0.3933, "step": 8330, "task_loss": 0.3725518584251404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3196388781070709, "epoch": 7.04, "learning_rate": 1.4788672865595943e-05, "loss": 0.4744, "step": 8331, "task_loss": 0.9997777342796326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33940571546554565, "epoch": 7.04, "learning_rate": 1.4784446322907863e-05, "loss": 0.4123, "step": 8332, "task_loss": 0.48851415514945984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31779640913009644, "epoch": 7.04, "learning_rate": 1.4780219780219781e-05, "loss": 0.3807, "step": 8333, "task_loss": 0.24944418668746948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4143342971801758, "epoch": 7.04, "learning_rate": 1.4775993237531699e-05, "loss": 0.3972, "step": 8334, "task_loss": 0.5147993564605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39723363518714905, "epoch": 7.05, "learning_rate": 1.4771766694843619e-05, "loss": 0.4624, "step": 8335, "task_loss": 1.3776476383209229 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5259736776351929, "epoch": 7.05, "learning_rate": 1.4767540152155537e-05, "loss": 0.4428, "step": 8336, "task_loss": 1.361332654953003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5901424884796143, "epoch": 7.05, "learning_rate": 1.4763313609467455e-05, "loss": 0.4203, "step": 8337, "task_loss": 0.3991377651691437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2562532424926758, "epoch": 7.05, "learning_rate": 1.4759087066779376e-05, "loss": 0.3027, "step": 8338, "task_loss": 0.2564619779586792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8435754776000977, "epoch": 7.05, "learning_rate": 1.4754860524091293e-05, "loss": 0.5915, "step": 8339, "task_loss": 1.1529139280319214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2510181665420532, "epoch": 7.05, "learning_rate": 1.4750633981403214e-05, "loss": 0.3461, "step": 8340, "task_loss": 0.0932350903749466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44214338064193726, "epoch": 7.05, "learning_rate": 1.4746407438715132e-05, "loss": 0.3211, "step": 8341, "task_loss": 0.5769551396369934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4646173119544983, "epoch": 7.05, "learning_rate": 1.474218089602705e-05, "loss": 0.4324, "step": 8342, "task_loss": 0.6291733980178833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3288612365722656, "epoch": 7.05, "learning_rate": 1.473795435333897e-05, "loss": 0.4527, "step": 8343, "task_loss": 0.5451481342315674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24552994966506958, "epoch": 7.05, "learning_rate": 1.4733727810650888e-05, "loss": 0.3535, "step": 8344, "task_loss": 0.557092010974884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4155973494052887, "epoch": 7.05, "learning_rate": 1.4729501267962806e-05, "loss": 0.4328, "step": 8345, "task_loss": 1.024409532546997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1909499615430832, "epoch": 7.05, "learning_rate": 1.4725274725274726e-05, "loss": 0.4025, "step": 8346, "task_loss": 0.15733684599399567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40135157108306885, "epoch": 7.06, "learning_rate": 1.4721048182586644e-05, "loss": 0.4593, "step": 8347, "task_loss": 0.6721363067626953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3227062523365021, "epoch": 7.06, "learning_rate": 1.4716821639898565e-05, "loss": 0.3324, "step": 8348, "task_loss": 0.661353588104248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6425338387489319, "epoch": 7.06, "learning_rate": 1.4712595097210483e-05, "loss": 0.5286, "step": 8349, "task_loss": 0.9753245115280151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27931398153305054, "epoch": 7.06, "learning_rate": 1.47083685545224e-05, "loss": 0.3629, "step": 8350, "task_loss": 0.4048572778701782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40779945254325867, "epoch": 7.06, "learning_rate": 1.4704142011834321e-05, "loss": 0.411, "step": 8351, "task_loss": 1.1448734998703003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5855027437210083, "epoch": 7.06, "learning_rate": 1.4699915469146239e-05, "loss": 0.346, "step": 8352, "task_loss": 0.46638232469558716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43462449312210083, "epoch": 7.06, "learning_rate": 1.4695688926458159e-05, "loss": 0.3666, "step": 8353, "task_loss": 0.7084907293319702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.395760715007782, "epoch": 7.06, "learning_rate": 1.4691462383770077e-05, "loss": 0.3936, "step": 8354, "task_loss": 0.5238751769065857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.505798876285553, "epoch": 7.06, "learning_rate": 1.4687235841081995e-05, "loss": 0.3663, "step": 8355, "task_loss": 0.529261589050293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29745352268218994, "epoch": 7.06, "learning_rate": 1.4683009298393915e-05, "loss": 0.3556, "step": 8356, "task_loss": 0.4951339662075043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2606540024280548, "epoch": 7.06, "learning_rate": 1.4678782755705833e-05, "loss": 0.5152, "step": 8357, "task_loss": 1.1197713613510132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3915786147117615, "epoch": 7.07, "learning_rate": 1.467455621301775e-05, "loss": 0.559, "step": 8358, "task_loss": 0.6039315462112427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48208317160606384, "epoch": 7.07, "learning_rate": 1.4670329670329672e-05, "loss": 0.5084, "step": 8359, "task_loss": 0.703864574432373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3494284749031067, "epoch": 7.07, "learning_rate": 1.466610312764159e-05, "loss": 0.3168, "step": 8360, "task_loss": 0.591950535774231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5578014850616455, "epoch": 7.07, "learning_rate": 1.466187658495351e-05, "loss": 0.3459, "step": 8361, "task_loss": 0.8674119114875793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3628447651863098, "epoch": 7.07, "learning_rate": 1.4657650042265428e-05, "loss": 0.3516, "step": 8362, "task_loss": 0.5179589986801147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3679960370063782, "epoch": 7.07, "learning_rate": 1.4653423499577346e-05, "loss": 0.4046, "step": 8363, "task_loss": 0.37846291065216064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29077625274658203, "epoch": 7.07, "learning_rate": 1.4649196956889266e-05, "loss": 0.4831, "step": 8364, "task_loss": 0.55636066198349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22993029654026031, "epoch": 7.07, "learning_rate": 1.4644970414201184e-05, "loss": 0.3161, "step": 8365, "task_loss": 0.5594084858894348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5078344345092773, "epoch": 7.07, "learning_rate": 1.4640743871513102e-05, "loss": 0.3597, "step": 8366, "task_loss": 0.3144093453884125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21866194903850555, "epoch": 7.07, "learning_rate": 1.4636517328825022e-05, "loss": 0.2842, "step": 8367, "task_loss": 0.03512582555413246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6771223545074463, "epoch": 7.07, "learning_rate": 1.463229078613694e-05, "loss": 0.4666, "step": 8368, "task_loss": 1.0261646509170532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30612093210220337, "epoch": 7.07, "learning_rate": 1.4628064243448861e-05, "loss": 0.423, "step": 8369, "task_loss": 0.08478358387947083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2628135085105896, "epoch": 7.08, "learning_rate": 1.462383770076078e-05, "loss": 0.5317, "step": 8370, "task_loss": 0.658598780632019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4806727170944214, "epoch": 7.08, "learning_rate": 1.4619611158072696e-05, "loss": 0.3946, "step": 8371, "task_loss": 0.674254834651947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3670101463794708, "epoch": 7.08, "learning_rate": 1.4615384615384617e-05, "loss": 0.4243, "step": 8372, "task_loss": 1.2578107118606567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3100663423538208, "epoch": 7.08, "learning_rate": 1.4611158072696535e-05, "loss": 0.4452, "step": 8373, "task_loss": 0.8208237886428833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43752744793891907, "epoch": 7.08, "learning_rate": 1.4606931530008453e-05, "loss": 0.3856, "step": 8374, "task_loss": 0.16295449435710907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3186638653278351, "epoch": 7.08, "learning_rate": 1.4602704987320373e-05, "loss": 0.4149, "step": 8375, "task_loss": 0.2676573395729065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.343463659286499, "epoch": 7.08, "learning_rate": 1.459847844463229e-05, "loss": 0.3776, "step": 8376, "task_loss": 1.2625263929367065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39731496572494507, "epoch": 7.08, "learning_rate": 1.4594251901944212e-05, "loss": 0.3956, "step": 8377, "task_loss": 0.41253435611724854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3758246898651123, "epoch": 7.08, "learning_rate": 1.4590025359256129e-05, "loss": 0.4325, "step": 8378, "task_loss": 0.647294282913208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32561200857162476, "epoch": 7.08, "learning_rate": 1.4585798816568047e-05, "loss": 0.3627, "step": 8379, "task_loss": 0.5770902037620544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5017141103744507, "epoch": 7.08, "learning_rate": 1.4581572273879968e-05, "loss": 0.4986, "step": 8380, "task_loss": 2.031759262084961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3875226378440857, "epoch": 7.08, "learning_rate": 1.4577345731191886e-05, "loss": 0.3372, "step": 8381, "task_loss": 0.6885057091712952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4345259666442871, "epoch": 7.09, "learning_rate": 1.4573119188503802e-05, "loss": 0.4647, "step": 8382, "task_loss": 0.7620514035224915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26603513956069946, "epoch": 7.09, "learning_rate": 1.4568892645815724e-05, "loss": 0.4349, "step": 8383, "task_loss": 0.5241237282752991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.507420003414154, "epoch": 7.09, "learning_rate": 1.4564666103127642e-05, "loss": 0.3472, "step": 8384, "task_loss": 0.7135237455368042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.521060585975647, "epoch": 7.09, "learning_rate": 1.4560439560439562e-05, "loss": 0.4545, "step": 8385, "task_loss": 0.6666771769523621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3788931965827942, "epoch": 7.09, "learning_rate": 1.455621301775148e-05, "loss": 0.4404, "step": 8386, "task_loss": 0.45320555567741394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6414828300476074, "epoch": 7.09, "learning_rate": 1.4551986475063398e-05, "loss": 0.4703, "step": 8387, "task_loss": 0.5006552338600159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31551817059516907, "epoch": 7.09, "learning_rate": 1.4547759932375318e-05, "loss": 0.5051, "step": 8388, "task_loss": 0.8855440020561218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27681753039360046, "epoch": 7.09, "learning_rate": 1.4543533389687236e-05, "loss": 0.3167, "step": 8389, "task_loss": 1.2749816179275513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4604836702346802, "epoch": 7.09, "learning_rate": 1.4539306846999157e-05, "loss": 0.4733, "step": 8390, "task_loss": 0.7168909907341003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29975900053977966, "epoch": 7.09, "learning_rate": 1.4535080304311075e-05, "loss": 0.394, "step": 8391, "task_loss": 0.592528223991394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37415575981140137, "epoch": 7.09, "learning_rate": 1.4530853761622993e-05, "loss": 0.3915, "step": 8392, "task_loss": 0.7118204832077026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3613831698894501, "epoch": 7.09, "learning_rate": 1.4526627218934913e-05, "loss": 0.4351, "step": 8393, "task_loss": 0.36225467920303345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3314611613750458, "epoch": 7.1, "learning_rate": 1.4522400676246831e-05, "loss": 0.42, "step": 8394, "task_loss": 0.6385670900344849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39198702573776245, "epoch": 7.1, "learning_rate": 1.4518174133558749e-05, "loss": 0.3671, "step": 8395, "task_loss": 0.19258631765842438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5684965252876282, "epoch": 7.1, "learning_rate": 1.4513947590870669e-05, "loss": 0.4379, "step": 8396, "task_loss": 0.8100693225860596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44245094060897827, "epoch": 7.1, "learning_rate": 1.4509721048182587e-05, "loss": 0.4298, "step": 8397, "task_loss": 0.8396496176719666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3851650357246399, "epoch": 7.1, "learning_rate": 1.4505494505494508e-05, "loss": 0.3718, "step": 8398, "task_loss": 1.311450481414795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2873406708240509, "epoch": 7.1, "learning_rate": 1.4501267962806425e-05, "loss": 0.3326, "step": 8399, "task_loss": 0.7790699005126953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25097569823265076, "epoch": 7.1, "learning_rate": 1.4497041420118343e-05, "loss": 0.4242, "step": 8400, "task_loss": 0.4699631929397583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4071921706199646, "epoch": 7.1, "learning_rate": 1.4492814877430264e-05, "loss": 0.3151, "step": 8401, "task_loss": 0.40444299578666687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4477057456970215, "epoch": 7.1, "learning_rate": 1.4488588334742182e-05, "loss": 0.4014, "step": 8402, "task_loss": 0.34798693656921387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21141371130943298, "epoch": 7.1, "learning_rate": 1.4484361792054098e-05, "loss": 0.3206, "step": 8403, "task_loss": 0.2571597397327423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3580038547515869, "epoch": 7.1, "learning_rate": 1.448013524936602e-05, "loss": 0.4085, "step": 8404, "task_loss": 0.24061186611652374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33964747190475464, "epoch": 7.1, "learning_rate": 1.4475908706677938e-05, "loss": 0.3618, "step": 8405, "task_loss": 0.7125963568687439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46652156114578247, "epoch": 7.11, "learning_rate": 1.4471682163989858e-05, "loss": 0.5561, "step": 8406, "task_loss": 0.43365633487701416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43037891387939453, "epoch": 7.11, "learning_rate": 1.4467455621301776e-05, "loss": 0.4245, "step": 8407, "task_loss": 1.1133359670639038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.576919436454773, "epoch": 7.11, "learning_rate": 1.4463229078613694e-05, "loss": 0.4285, "step": 8408, "task_loss": 1.2657321691513062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26890116930007935, "epoch": 7.11, "learning_rate": 1.4459002535925615e-05, "loss": 0.373, "step": 8409, "task_loss": 0.5192141532897949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25023606419563293, "epoch": 7.11, "learning_rate": 1.4454775993237531e-05, "loss": 0.2288, "step": 8410, "task_loss": 0.37080201506614685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26618292927742004, "epoch": 7.11, "learning_rate": 1.445054945054945e-05, "loss": 0.3846, "step": 8411, "task_loss": 0.5393050312995911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3965601623058319, "epoch": 7.11, "learning_rate": 1.4446322907861371e-05, "loss": 0.3517, "step": 8412, "task_loss": 0.3262026607990265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7163786888122559, "epoch": 7.11, "learning_rate": 1.4442096365173289e-05, "loss": 0.4877, "step": 8413, "task_loss": 1.8616886138916016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.779911994934082, "epoch": 7.11, "learning_rate": 1.4437869822485209e-05, "loss": 0.4698, "step": 8414, "task_loss": 0.9641918540000916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2862231731414795, "epoch": 7.11, "learning_rate": 1.4433643279797127e-05, "loss": 0.3652, "step": 8415, "task_loss": 0.46664008498191833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25905841588974, "epoch": 7.11, "learning_rate": 1.4429416737109045e-05, "loss": 0.3587, "step": 8416, "task_loss": 0.2576560974121094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.328513503074646, "epoch": 7.11, "learning_rate": 1.4425190194420965e-05, "loss": 0.4159, "step": 8417, "task_loss": 0.9867499470710754 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3979632556438446, "epoch": 7.12, "learning_rate": 1.4420963651732883e-05, "loss": 0.3711, "step": 8418, "task_loss": 0.44069308042526245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3352411985397339, "epoch": 7.12, "learning_rate": 1.4416737109044804e-05, "loss": 0.4819, "step": 8419, "task_loss": 0.5708295106887817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4830598533153534, "epoch": 7.12, "learning_rate": 1.441251056635672e-05, "loss": 0.3979, "step": 8420, "task_loss": 0.29789814352989197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6309598684310913, "epoch": 7.12, "learning_rate": 1.4408284023668638e-05, "loss": 0.4275, "step": 8421, "task_loss": 1.065609097480774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5073386430740356, "epoch": 7.12, "learning_rate": 1.440405748098056e-05, "loss": 0.4938, "step": 8422, "task_loss": 1.0641427040100098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41760551929473877, "epoch": 7.12, "learning_rate": 1.4399830938292478e-05, "loss": 0.3676, "step": 8423, "task_loss": 1.1484755277633667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29046791791915894, "epoch": 7.12, "learning_rate": 1.4395604395604396e-05, "loss": 0.3881, "step": 8424, "task_loss": 0.41065356135368347 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27040597796440125, "epoch": 7.12, "learning_rate": 1.4391377852916316e-05, "loss": 0.4059, "step": 8425, "task_loss": 1.3025606870651245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2354079782962799, "epoch": 7.12, "learning_rate": 1.4387151310228234e-05, "loss": 0.4741, "step": 8426, "task_loss": 0.25394389033317566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4093613922595978, "epoch": 7.12, "learning_rate": 1.4382924767540153e-05, "loss": 0.3411, "step": 8427, "task_loss": 0.3733874559402466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19723108410835266, "epoch": 7.12, "learning_rate": 1.4378698224852072e-05, "loss": 0.5138, "step": 8428, "task_loss": 0.60390305519104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4218757152557373, "epoch": 7.13, "learning_rate": 1.437447168216399e-05, "loss": 0.4909, "step": 8429, "task_loss": 0.5015130043029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28441160917282104, "epoch": 7.13, "learning_rate": 1.4370245139475911e-05, "loss": 0.4701, "step": 8430, "task_loss": 0.6278700828552246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45802924036979675, "epoch": 7.13, "learning_rate": 1.4366018596787827e-05, "loss": 0.4951, "step": 8431, "task_loss": 0.5403968095779419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4281238317489624, "epoch": 7.13, "learning_rate": 1.4361792054099745e-05, "loss": 0.3258, "step": 8432, "task_loss": 0.6291800141334534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5249758362770081, "epoch": 7.13, "learning_rate": 1.4357565511411667e-05, "loss": 0.3574, "step": 8433, "task_loss": 0.6581066846847534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40616482496261597, "epoch": 7.13, "learning_rate": 1.4353338968723585e-05, "loss": 0.3343, "step": 8434, "task_loss": 0.7830513119697571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28221940994262695, "epoch": 7.13, "learning_rate": 1.4349112426035505e-05, "loss": 0.4968, "step": 8435, "task_loss": 0.3690681755542755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45464053750038147, "epoch": 7.13, "learning_rate": 1.4344885883347423e-05, "loss": 0.4114, "step": 8436, "task_loss": 0.5992862582206726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26880329847335815, "epoch": 7.13, "learning_rate": 1.434065934065934e-05, "loss": 0.3347, "step": 8437, "task_loss": 0.8009253740310669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6621055603027344, "epoch": 7.13, "learning_rate": 1.433643279797126e-05, "loss": 0.557, "step": 8438, "task_loss": 0.5487411022186279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20455482602119446, "epoch": 7.13, "learning_rate": 1.4332206255283178e-05, "loss": 0.3814, "step": 8439, "task_loss": 0.5300395488739014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3860228955745697, "epoch": 7.13, "learning_rate": 1.4327979712595097e-05, "loss": 0.3239, "step": 8440, "task_loss": 1.090928554534912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5255444049835205, "epoch": 7.14, "learning_rate": 1.4323753169907016e-05, "loss": 0.4216, "step": 8441, "task_loss": 0.32724031805992126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41834431886672974, "epoch": 7.14, "learning_rate": 1.4319526627218934e-05, "loss": 0.4843, "step": 8442, "task_loss": 1.2157323360443115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7163252830505371, "epoch": 7.14, "learning_rate": 1.4315300084530856e-05, "loss": 0.4413, "step": 8443, "task_loss": 0.743472158908844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5468880534172058, "epoch": 7.14, "learning_rate": 1.4311073541842774e-05, "loss": 0.5176, "step": 8444, "task_loss": 0.5528901219367981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5406476259231567, "epoch": 7.14, "learning_rate": 1.4306846999154692e-05, "loss": 0.4607, "step": 8445, "task_loss": 1.1136703491210938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20621834695339203, "epoch": 7.14, "learning_rate": 1.4302620456466612e-05, "loss": 0.3089, "step": 8446, "task_loss": 0.13167735934257507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.417783260345459, "epoch": 7.14, "learning_rate": 1.429839391377853e-05, "loss": 0.4804, "step": 8447, "task_loss": 0.5905869007110596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4282853901386261, "epoch": 7.14, "learning_rate": 1.429416737109045e-05, "loss": 0.4501, "step": 8448, "task_loss": 0.9756159782409668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45216748118400574, "epoch": 7.14, "learning_rate": 1.4289940828402367e-05, "loss": 0.261, "step": 8449, "task_loss": 1.13016676902771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4449964463710785, "epoch": 7.14, "learning_rate": 1.4285714285714285e-05, "loss": 0.3383, "step": 8450, "task_loss": 0.7582607269287109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4292390048503876, "epoch": 7.14, "learning_rate": 1.4281487743026207e-05, "loss": 0.3779, "step": 8451, "task_loss": 0.2806881368160248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3443630039691925, "epoch": 7.14, "learning_rate": 1.4277261200338123e-05, "loss": 0.4384, "step": 8452, "task_loss": 0.2718794047832489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2630396783351898, "epoch": 7.15, "learning_rate": 1.4273034657650041e-05, "loss": 0.3079, "step": 8453, "task_loss": 0.5198727250099182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.267972469329834, "epoch": 7.15, "learning_rate": 1.4268808114961963e-05, "loss": 0.4167, "step": 8454, "task_loss": 0.712524950504303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3115214705467224, "epoch": 7.15, "learning_rate": 1.426458157227388e-05, "loss": 0.3991, "step": 8455, "task_loss": 0.6736319065093994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2567656934261322, "epoch": 7.15, "learning_rate": 1.42603550295858e-05, "loss": 0.3718, "step": 8456, "task_loss": 0.3898475766181946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2431955337524414, "epoch": 7.15, "learning_rate": 1.4256128486897719e-05, "loss": 0.392, "step": 8457, "task_loss": 0.5110139846801758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6100138425827026, "epoch": 7.15, "learning_rate": 1.4251901944209637e-05, "loss": 0.4836, "step": 8458, "task_loss": 0.5632320642471313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6514887809753418, "epoch": 7.15, "learning_rate": 1.4247675401521556e-05, "loss": 0.4821, "step": 8459, "task_loss": 0.8805688619613647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21099117398262024, "epoch": 7.15, "learning_rate": 1.4243448858833474e-05, "loss": 0.3596, "step": 8460, "task_loss": 0.10741354525089264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5412999987602234, "epoch": 7.15, "learning_rate": 1.4239222316145392e-05, "loss": 0.4048, "step": 8461, "task_loss": 0.8236729502677917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5572458505630493, "epoch": 7.15, "learning_rate": 1.4234995773457314e-05, "loss": 0.3967, "step": 8462, "task_loss": 1.0607006549835205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5347450971603394, "epoch": 7.15, "learning_rate": 1.423076923076923e-05, "loss": 0.4734, "step": 8463, "task_loss": 0.3777298927307129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3408353626728058, "epoch": 7.15, "learning_rate": 1.4226542688081152e-05, "loss": 0.3535, "step": 8464, "task_loss": 0.7234189510345459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20471210777759552, "epoch": 7.16, "learning_rate": 1.422231614539307e-05, "loss": 0.3814, "step": 8465, "task_loss": 0.606413722038269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.301734983921051, "epoch": 7.16, "learning_rate": 1.4218089602704988e-05, "loss": 0.3655, "step": 8466, "task_loss": 0.08791442215442657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.232186958193779, "epoch": 7.16, "learning_rate": 1.4213863060016907e-05, "loss": 0.3861, "step": 8467, "task_loss": 0.26512786746025085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34809571504592896, "epoch": 7.16, "learning_rate": 1.4209636517328825e-05, "loss": 0.3458, "step": 8468, "task_loss": 0.28579840064048767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3929246664047241, "epoch": 7.16, "learning_rate": 1.4205409974640744e-05, "loss": 0.3479, "step": 8469, "task_loss": 0.39567896723747253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19044481217861176, "epoch": 7.16, "learning_rate": 1.4201183431952663e-05, "loss": 0.3302, "step": 8470, "task_loss": 0.47425198554992676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6478866338729858, "epoch": 7.16, "learning_rate": 1.4196956889264581e-05, "loss": 0.5911, "step": 8471, "task_loss": 0.17720377445220947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2541215419769287, "epoch": 7.16, "learning_rate": 1.4192730346576503e-05, "loss": 0.2974, "step": 8472, "task_loss": 0.1366140991449356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39000970125198364, "epoch": 7.16, "learning_rate": 1.4188503803888419e-05, "loss": 0.3192, "step": 8473, "task_loss": 0.5380995273590088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29146015644073486, "epoch": 7.16, "learning_rate": 1.4184277261200337e-05, "loss": 0.3622, "step": 8474, "task_loss": 0.7985250949859619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3090130090713501, "epoch": 7.16, "learning_rate": 1.4180050718512259e-05, "loss": 0.4192, "step": 8475, "task_loss": 0.7839860916137695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40757304430007935, "epoch": 7.16, "learning_rate": 1.4175824175824177e-05, "loss": 0.405, "step": 8476, "task_loss": 0.9707086682319641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2710322439670563, "epoch": 7.17, "learning_rate": 1.4171597633136096e-05, "loss": 0.4355, "step": 8477, "task_loss": 0.974820613861084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40213143825531006, "epoch": 7.17, "learning_rate": 1.4167371090448014e-05, "loss": 0.5155, "step": 8478, "task_loss": 1.0192489624023438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37129613757133484, "epoch": 7.17, "learning_rate": 1.4163144547759932e-05, "loss": 0.3896, "step": 8479, "task_loss": 0.14015524089336395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30047017335891724, "epoch": 7.17, "learning_rate": 1.4158918005071852e-05, "loss": 0.4828, "step": 8480, "task_loss": 0.43552565574645996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2602026164531708, "epoch": 7.17, "learning_rate": 1.415469146238377e-05, "loss": 0.3499, "step": 8481, "task_loss": 0.764029324054718 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4497409462928772, "epoch": 7.17, "learning_rate": 1.4150464919695688e-05, "loss": 0.5215, "step": 8482, "task_loss": 0.7212861180305481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5065480470657349, "epoch": 7.17, "learning_rate": 1.414623837700761e-05, "loss": 0.4278, "step": 8483, "task_loss": 0.47008541226387024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6751823425292969, "epoch": 7.17, "learning_rate": 1.4142011834319526e-05, "loss": 0.432, "step": 8484, "task_loss": 0.43745505809783936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2547656297683716, "epoch": 7.17, "learning_rate": 1.4137785291631447e-05, "loss": 0.3467, "step": 8485, "task_loss": 0.2236821949481964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19951918721199036, "epoch": 7.17, "learning_rate": 1.4133558748943366e-05, "loss": 0.2963, "step": 8486, "task_loss": 0.10158511996269226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2329501360654831, "epoch": 7.17, "learning_rate": 1.4129332206255284e-05, "loss": 0.5852, "step": 8487, "task_loss": 1.3158363103866577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4098413288593292, "epoch": 7.17, "learning_rate": 1.4125105663567203e-05, "loss": 0.4492, "step": 8488, "task_loss": 0.6435500383377075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36278852820396423, "epoch": 7.18, "learning_rate": 1.4120879120879121e-05, "loss": 0.3517, "step": 8489, "task_loss": 0.16912001371383667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23609913885593414, "epoch": 7.18, "learning_rate": 1.411665257819104e-05, "loss": 0.4015, "step": 8490, "task_loss": 0.1170438826084137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6304333209991455, "epoch": 7.18, "learning_rate": 1.4112426035502959e-05, "loss": 0.4668, "step": 8491, "task_loss": 0.8908385634422302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26012930274009705, "epoch": 7.18, "learning_rate": 1.4108199492814877e-05, "loss": 0.4018, "step": 8492, "task_loss": 0.17272405326366425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3364792466163635, "epoch": 7.18, "learning_rate": 1.4103972950126799e-05, "loss": 0.3985, "step": 8493, "task_loss": 0.27075305581092834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5751929879188538, "epoch": 7.18, "learning_rate": 1.4099746407438717e-05, "loss": 0.5474, "step": 8494, "task_loss": 1.724953532218933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2538541555404663, "epoch": 7.18, "learning_rate": 1.4095519864750633e-05, "loss": 0.337, "step": 8495, "task_loss": 0.5670956373214722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2530858516693115, "epoch": 7.18, "learning_rate": 1.4091293322062554e-05, "loss": 0.2667, "step": 8496, "task_loss": 1.0100651979446411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13354969024658203, "epoch": 7.18, "learning_rate": 1.4087066779374472e-05, "loss": 0.4209, "step": 8497, "task_loss": 0.09509512037038803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24093709886074066, "epoch": 7.18, "learning_rate": 1.408284023668639e-05, "loss": 0.3687, "step": 8498, "task_loss": 0.05606451630592346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2583601772785187, "epoch": 7.18, "learning_rate": 1.407861369399831e-05, "loss": 0.4065, "step": 8499, "task_loss": 0.876528799533844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3202129900455475, "epoch": 7.19, "learning_rate": 1.4074387151310228e-05, "loss": 0.2448, "step": 8500, "task_loss": 0.3950749337673187 }, { "epoch": 7.19, "eval_accuracy": 0.9123564356435644, "eval_loss": 0.2746867537498474, "eval_runtime": 226.0412, "eval_samples_per_second": 111.705, "eval_steps_per_second": 0.876, "step": 8500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28804031014442444, "epoch": 7.19, "learning_rate": 1.4070160608622148e-05, "loss": 0.355, "step": 8501, "task_loss": 0.6555396914482117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20275960862636566, "epoch": 7.19, "learning_rate": 1.4065934065934066e-05, "loss": 0.3712, "step": 8502, "task_loss": 0.14004822075366974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2455885112285614, "epoch": 7.19, "learning_rate": 1.4061707523245984e-05, "loss": 0.3485, "step": 8503, "task_loss": 0.32804399728775024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4071706235408783, "epoch": 7.19, "learning_rate": 1.4057480980557906e-05, "loss": 0.4001, "step": 8504, "task_loss": 1.0248295068740845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33268868923187256, "epoch": 7.19, "learning_rate": 1.4053254437869822e-05, "loss": 0.3663, "step": 8505, "task_loss": 0.42013972997665405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5348602533340454, "epoch": 7.19, "learning_rate": 1.4049027895181743e-05, "loss": 0.3566, "step": 8506, "task_loss": 0.42829397320747375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4743642508983612, "epoch": 7.19, "learning_rate": 1.4044801352493661e-05, "loss": 0.5073, "step": 8507, "task_loss": 0.6758993864059448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5060102939605713, "epoch": 7.19, "learning_rate": 1.404057480980558e-05, "loss": 0.571, "step": 8508, "task_loss": 0.27656054496765137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4982975721359253, "epoch": 7.19, "learning_rate": 1.40363482671175e-05, "loss": 0.4394, "step": 8509, "task_loss": 0.7105655074119568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22778832912445068, "epoch": 7.19, "learning_rate": 1.4032121724429417e-05, "loss": 0.3419, "step": 8510, "task_loss": 0.7158111929893494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6181659698486328, "epoch": 7.19, "learning_rate": 1.4027895181741335e-05, "loss": 0.6052, "step": 8511, "task_loss": 2.570383310317993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48055508732795715, "epoch": 7.2, "learning_rate": 1.4023668639053255e-05, "loss": 0.3213, "step": 8512, "task_loss": 0.9995883703231812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24953579902648926, "epoch": 7.2, "learning_rate": 1.4019442096365173e-05, "loss": 0.4412, "step": 8513, "task_loss": 0.46950456500053406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5099080801010132, "epoch": 7.2, "learning_rate": 1.4015215553677094e-05, "loss": 0.5014, "step": 8514, "task_loss": 0.9196261167526245 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2941228151321411, "epoch": 7.2, "learning_rate": 1.4010989010989013e-05, "loss": 0.3823, "step": 8515, "task_loss": 0.4850535988807678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2971624433994293, "epoch": 7.2, "learning_rate": 1.4006762468300929e-05, "loss": 0.3671, "step": 8516, "task_loss": 0.32122349739074707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4055556654930115, "epoch": 7.2, "learning_rate": 1.400253592561285e-05, "loss": 0.4287, "step": 8517, "task_loss": 0.519234299659729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20970256626605988, "epoch": 7.2, "learning_rate": 1.3998309382924768e-05, "loss": 0.3326, "step": 8518, "task_loss": 0.03902333602309227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3713664412498474, "epoch": 7.2, "learning_rate": 1.3994082840236686e-05, "loss": 0.4008, "step": 8519, "task_loss": 0.14999502897262573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37278780341148376, "epoch": 7.2, "learning_rate": 1.3989856297548606e-05, "loss": 0.3808, "step": 8520, "task_loss": 0.343439519405365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28966426849365234, "epoch": 7.2, "learning_rate": 1.3985629754860524e-05, "loss": 0.3902, "step": 8521, "task_loss": 0.35030561685562134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2308727204799652, "epoch": 7.2, "learning_rate": 1.3981403212172444e-05, "loss": 0.404, "step": 8522, "task_loss": 0.28466206789016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4016810953617096, "epoch": 7.2, "learning_rate": 1.3977176669484362e-05, "loss": 0.3862, "step": 8523, "task_loss": 0.39856624603271484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3083394169807434, "epoch": 7.21, "learning_rate": 1.397295012679628e-05, "loss": 0.3712, "step": 8524, "task_loss": 0.6056357622146606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2287510633468628, "epoch": 7.21, "learning_rate": 1.3968723584108201e-05, "loss": 0.3241, "step": 8525, "task_loss": 0.556786060333252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2634776532649994, "epoch": 7.21, "learning_rate": 1.396449704142012e-05, "loss": 0.3582, "step": 8526, "task_loss": 0.4338427782058716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37449824810028076, "epoch": 7.21, "learning_rate": 1.3960270498732036e-05, "loss": 0.361, "step": 8527, "task_loss": 0.8757673501968384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20253527164459229, "epoch": 7.21, "learning_rate": 1.3956043956043957e-05, "loss": 0.4218, "step": 8528, "task_loss": 0.16213764250278473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3754189610481262, "epoch": 7.21, "learning_rate": 1.3951817413355875e-05, "loss": 0.3449, "step": 8529, "task_loss": 0.06617217510938644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5472580790519714, "epoch": 7.21, "learning_rate": 1.3947590870667795e-05, "loss": 0.3785, "step": 8530, "task_loss": 0.8132808804512024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2919911742210388, "epoch": 7.21, "learning_rate": 1.3943364327979713e-05, "loss": 0.4958, "step": 8531, "task_loss": 0.37474772334098816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2878856062889099, "epoch": 7.21, "learning_rate": 1.3939137785291631e-05, "loss": 0.4154, "step": 8532, "task_loss": 0.2587900161743164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28955981135368347, "epoch": 7.21, "learning_rate": 1.3934911242603551e-05, "loss": 0.3374, "step": 8533, "task_loss": 0.47811076045036316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21137693524360657, "epoch": 7.21, "learning_rate": 1.3930684699915469e-05, "loss": 0.4438, "step": 8534, "task_loss": 0.2500072121620178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4729272723197937, "epoch": 7.21, "learning_rate": 1.392645815722739e-05, "loss": 0.4598, "step": 8535, "task_loss": 0.6682793498039246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30739715695381165, "epoch": 7.22, "learning_rate": 1.3922231614539308e-05, "loss": 0.3144, "step": 8536, "task_loss": 0.8692510724067688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27924707531929016, "epoch": 7.22, "learning_rate": 1.3918005071851225e-05, "loss": 0.2895, "step": 8537, "task_loss": 0.4635639786720276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30036357045173645, "epoch": 7.22, "learning_rate": 1.3913778529163146e-05, "loss": 0.3438, "step": 8538, "task_loss": 0.5060112476348877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4012424051761627, "epoch": 7.22, "learning_rate": 1.3909551986475064e-05, "loss": 0.4482, "step": 8539, "task_loss": 0.772833526134491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4473796486854553, "epoch": 7.22, "learning_rate": 1.3905325443786982e-05, "loss": 0.394, "step": 8540, "task_loss": 0.34098735451698303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40113675594329834, "epoch": 7.22, "learning_rate": 1.3901098901098902e-05, "loss": 0.5206, "step": 8541, "task_loss": 0.21092194318771362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3019765615463257, "epoch": 7.22, "learning_rate": 1.389687235841082e-05, "loss": 0.3286, "step": 8542, "task_loss": 0.235370472073555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47698116302490234, "epoch": 7.22, "learning_rate": 1.3892645815722741e-05, "loss": 0.4689, "step": 8543, "task_loss": 0.3217032551765442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3377663493156433, "epoch": 7.22, "learning_rate": 1.3888419273034658e-05, "loss": 0.3652, "step": 8544, "task_loss": 0.41617658734321594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29802316427230835, "epoch": 7.22, "learning_rate": 1.3884192730346576e-05, "loss": 0.3668, "step": 8545, "task_loss": 0.5862835049629211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5737792253494263, "epoch": 7.22, "learning_rate": 1.3879966187658497e-05, "loss": 0.5202, "step": 8546, "task_loss": 0.6241216659545898 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3956802487373352, "epoch": 7.22, "learning_rate": 1.3875739644970415e-05, "loss": 0.4122, "step": 8547, "task_loss": 1.2210131883621216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4121933579444885, "epoch": 7.23, "learning_rate": 1.3871513102282332e-05, "loss": 0.5259, "step": 8548, "task_loss": 0.5457651019096375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4244515895843506, "epoch": 7.23, "learning_rate": 1.3867286559594253e-05, "loss": 0.3731, "step": 8549, "task_loss": 0.20137014985084534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5554964542388916, "epoch": 7.23, "learning_rate": 1.3863060016906171e-05, "loss": 0.461, "step": 8550, "task_loss": 0.3971710205078125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40970927476882935, "epoch": 7.23, "learning_rate": 1.3858833474218091e-05, "loss": 0.3715, "step": 8551, "task_loss": 0.9400939345359802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4315983057022095, "epoch": 7.23, "learning_rate": 1.3854606931530009e-05, "loss": 0.3273, "step": 8552, "task_loss": 0.7486255168914795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28100165724754333, "epoch": 7.23, "learning_rate": 1.3850380388841927e-05, "loss": 0.3264, "step": 8553, "task_loss": 0.43455350399017334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17266938090324402, "epoch": 7.23, "learning_rate": 1.3846153846153847e-05, "loss": 0.3853, "step": 8554, "task_loss": 0.24986565113067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.479789674282074, "epoch": 7.23, "learning_rate": 1.3841927303465765e-05, "loss": 0.5231, "step": 8555, "task_loss": 0.224117711186409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34944698214530945, "epoch": 7.23, "learning_rate": 1.3837700760777683e-05, "loss": 0.3907, "step": 8556, "task_loss": 0.9573411345481873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5413358211517334, "epoch": 7.23, "learning_rate": 1.3833474218089604e-05, "loss": 0.5717, "step": 8557, "task_loss": 0.7521502375602722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5187146663665771, "epoch": 7.23, "learning_rate": 1.3829247675401522e-05, "loss": 0.3791, "step": 8558, "task_loss": 0.7211936712265015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3217013478279114, "epoch": 7.23, "learning_rate": 1.3825021132713442e-05, "loss": 0.3773, "step": 8559, "task_loss": 0.184209942817688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3858310580253601, "epoch": 7.24, "learning_rate": 1.382079459002536e-05, "loss": 0.3311, "step": 8560, "task_loss": 0.5542607307434082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30691802501678467, "epoch": 7.24, "learning_rate": 1.3816568047337278e-05, "loss": 0.3789, "step": 8561, "task_loss": 0.5664929747581482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36254364252090454, "epoch": 7.24, "learning_rate": 1.3812341504649198e-05, "loss": 0.418, "step": 8562, "task_loss": 0.4081762135028839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24940970540046692, "epoch": 7.24, "learning_rate": 1.3808114961961116e-05, "loss": 0.3356, "step": 8563, "task_loss": 0.05239592120051384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5555640459060669, "epoch": 7.24, "learning_rate": 1.3803888419273037e-05, "loss": 0.5096, "step": 8564, "task_loss": 1.212148666381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4263947606086731, "epoch": 7.24, "learning_rate": 1.3799661876584954e-05, "loss": 0.4928, "step": 8565, "task_loss": 1.0455942153930664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3000696301460266, "epoch": 7.24, "learning_rate": 1.3795435333896872e-05, "loss": 0.5257, "step": 8566, "task_loss": 0.4034479856491089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30817484855651855, "epoch": 7.24, "learning_rate": 1.3791208791208793e-05, "loss": 0.3162, "step": 8567, "task_loss": 0.34903639554977417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29415959119796753, "epoch": 7.24, "learning_rate": 1.3786982248520711e-05, "loss": 0.3466, "step": 8568, "task_loss": 0.6986730098724365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34249183535575867, "epoch": 7.24, "learning_rate": 1.3782755705832628e-05, "loss": 0.3542, "step": 8569, "task_loss": 1.0983505249023438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38892924785614014, "epoch": 7.24, "learning_rate": 1.3778529163144549e-05, "loss": 0.4218, "step": 8570, "task_loss": 0.30498379468917847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4897609353065491, "epoch": 7.24, "learning_rate": 1.3774302620456467e-05, "loss": 0.4079, "step": 8571, "task_loss": 0.8633142709732056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4524924159049988, "epoch": 7.25, "learning_rate": 1.3770076077768387e-05, "loss": 0.3466, "step": 8572, "task_loss": 0.7186328172683716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3173494338989258, "epoch": 7.25, "learning_rate": 1.3765849535080305e-05, "loss": 0.4324, "step": 8573, "task_loss": 1.0681122541427612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5366412401199341, "epoch": 7.25, "learning_rate": 1.3761622992392223e-05, "loss": 0.5694, "step": 8574, "task_loss": 0.5369795560836792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2759028375148773, "epoch": 7.25, "learning_rate": 1.3757396449704144e-05, "loss": 0.4325, "step": 8575, "task_loss": 0.08812876790761948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6899816989898682, "epoch": 7.25, "learning_rate": 1.375316990701606e-05, "loss": 0.4896, "step": 8576, "task_loss": 0.4380262792110443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5933284163475037, "epoch": 7.25, "learning_rate": 1.3748943364327979e-05, "loss": 0.3765, "step": 8577, "task_loss": 0.6137198805809021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4504917562007904, "epoch": 7.25, "learning_rate": 1.37447168216399e-05, "loss": 0.4347, "step": 8578, "task_loss": 1.3623602390289307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2852308750152588, "epoch": 7.25, "learning_rate": 1.3740490278951818e-05, "loss": 0.4276, "step": 8579, "task_loss": 0.21618777513504028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4170371890068054, "epoch": 7.25, "learning_rate": 1.3736263736263738e-05, "loss": 0.5685, "step": 8580, "task_loss": 1.2817444801330566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41002053022384644, "epoch": 7.25, "learning_rate": 1.3732037193575656e-05, "loss": 0.3637, "step": 8581, "task_loss": 0.8383623361587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3668299615383148, "epoch": 7.25, "learning_rate": 1.3727810650887574e-05, "loss": 0.3804, "step": 8582, "task_loss": 0.6670367121696472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30578210949897766, "epoch": 7.26, "learning_rate": 1.3723584108199494e-05, "loss": 0.3686, "step": 8583, "task_loss": 0.14470511674880981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7104932069778442, "epoch": 7.26, "learning_rate": 1.3719357565511412e-05, "loss": 0.4109, "step": 8584, "task_loss": 1.0525892972946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2472262680530548, "epoch": 7.26, "learning_rate": 1.371513102282333e-05, "loss": 0.264, "step": 8585, "task_loss": 0.3102058470249176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2992400825023651, "epoch": 7.26, "learning_rate": 1.371090448013525e-05, "loss": 0.3902, "step": 8586, "task_loss": 1.0000417232513428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.397636741399765, "epoch": 7.26, "learning_rate": 1.3706677937447168e-05, "loss": 0.3431, "step": 8587, "task_loss": 0.38747352361679077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3389577269554138, "epoch": 7.26, "learning_rate": 1.3702451394759089e-05, "loss": 0.3182, "step": 8588, "task_loss": 0.7572832703590393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3555694818496704, "epoch": 7.26, "learning_rate": 1.3698224852071007e-05, "loss": 0.2956, "step": 8589, "task_loss": 0.9081255197525024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5013615489006042, "epoch": 7.26, "learning_rate": 1.3693998309382925e-05, "loss": 0.4981, "step": 8590, "task_loss": 1.1445088386535645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25971195101737976, "epoch": 7.26, "learning_rate": 1.3689771766694845e-05, "loss": 0.3105, "step": 8591, "task_loss": 0.6002896428108215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43126654624938965, "epoch": 7.26, "learning_rate": 1.3685545224006763e-05, "loss": 0.4341, "step": 8592, "task_loss": 0.22942760586738586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3452215790748596, "epoch": 7.26, "learning_rate": 1.3681318681318683e-05, "loss": 0.3979, "step": 8593, "task_loss": 1.3989520072937012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2451454997062683, "epoch": 7.26, "learning_rate": 1.36770921386306e-05, "loss": 0.3929, "step": 8594, "task_loss": 0.78183513879776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3263765275478363, "epoch": 7.27, "learning_rate": 1.3672865595942519e-05, "loss": 0.3592, "step": 8595, "task_loss": 0.24866414070129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42782655358314514, "epoch": 7.27, "learning_rate": 1.366863905325444e-05, "loss": 0.3086, "step": 8596, "task_loss": 0.4088786840438843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35576164722442627, "epoch": 7.27, "learning_rate": 1.3664412510566357e-05, "loss": 0.4195, "step": 8597, "task_loss": 0.6562376022338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2822190523147583, "epoch": 7.27, "learning_rate": 1.3660185967878275e-05, "loss": 0.4104, "step": 8598, "task_loss": 0.04291326552629471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4295153319835663, "epoch": 7.27, "learning_rate": 1.3655959425190196e-05, "loss": 0.4032, "step": 8599, "task_loss": 0.5472261905670166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3212353587150574, "epoch": 7.27, "learning_rate": 1.3651732882502114e-05, "loss": 0.4338, "step": 8600, "task_loss": 1.1058002710342407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28444987535476685, "epoch": 7.27, "learning_rate": 1.3647506339814034e-05, "loss": 0.3154, "step": 8601, "task_loss": 0.12570720911026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25766852498054504, "epoch": 7.27, "learning_rate": 1.3643279797125952e-05, "loss": 0.2679, "step": 8602, "task_loss": 0.12526074051856995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3514648675918579, "epoch": 7.27, "learning_rate": 1.363905325443787e-05, "loss": 0.3528, "step": 8603, "task_loss": 0.8331689238548279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3361646234989166, "epoch": 7.27, "learning_rate": 1.363482671174979e-05, "loss": 0.3919, "step": 8604, "task_loss": 0.5955148339271545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3545660376548767, "epoch": 7.27, "learning_rate": 1.3630600169061708e-05, "loss": 0.3839, "step": 8605, "task_loss": 1.0678670406341553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5521379709243774, "epoch": 7.27, "learning_rate": 1.3626373626373626e-05, "loss": 0.5002, "step": 8606, "task_loss": 0.6152999401092529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4553637206554413, "epoch": 7.28, "learning_rate": 1.3622147083685547e-05, "loss": 0.4117, "step": 8607, "task_loss": 0.19236235320568085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.540372908115387, "epoch": 7.28, "learning_rate": 1.3617920540997464e-05, "loss": 0.4247, "step": 8608, "task_loss": 1.370190978050232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2881808876991272, "epoch": 7.28, "learning_rate": 1.3613693998309385e-05, "loss": 0.3531, "step": 8609, "task_loss": 0.6731514930725098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25387847423553467, "epoch": 7.28, "learning_rate": 1.3609467455621303e-05, "loss": 0.4682, "step": 8610, "task_loss": 0.9486519694328308 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30359694361686707, "epoch": 7.28, "learning_rate": 1.3605240912933221e-05, "loss": 0.3656, "step": 8611, "task_loss": 0.46465539932250977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16782517731189728, "epoch": 7.28, "learning_rate": 1.360101437024514e-05, "loss": 0.4961, "step": 8612, "task_loss": 0.08161499351263046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7686130404472351, "epoch": 7.28, "learning_rate": 1.3596787827557059e-05, "loss": 0.497, "step": 8613, "task_loss": 0.8932656049728394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27487850189208984, "epoch": 7.28, "learning_rate": 1.3592561284868977e-05, "loss": 0.3704, "step": 8614, "task_loss": 0.8220574259757996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3330289423465729, "epoch": 7.28, "learning_rate": 1.3588334742180897e-05, "loss": 0.5226, "step": 8615, "task_loss": 0.7364484071731567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25871479511260986, "epoch": 7.28, "learning_rate": 1.3584108199492815e-05, "loss": 0.3592, "step": 8616, "task_loss": 0.9599449038505554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33043479919433594, "epoch": 7.28, "learning_rate": 1.3579881656804736e-05, "loss": 0.4356, "step": 8617, "task_loss": 0.6811276078224182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5672525763511658, "epoch": 7.28, "learning_rate": 1.3575655114116652e-05, "loss": 0.4662, "step": 8618, "task_loss": 0.9465789794921875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21204985678195953, "epoch": 7.29, "learning_rate": 1.357142857142857e-05, "loss": 0.4529, "step": 8619, "task_loss": 0.12446191161870956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3419177532196045, "epoch": 7.29, "learning_rate": 1.3567202028740492e-05, "loss": 0.3171, "step": 8620, "task_loss": 0.813675582408905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25606483221054077, "epoch": 7.29, "learning_rate": 1.356297548605241e-05, "loss": 0.3336, "step": 8621, "task_loss": 0.05582047253847122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8029910922050476, "epoch": 7.29, "learning_rate": 1.355874894336433e-05, "loss": 0.5935, "step": 8622, "task_loss": 0.787638783454895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40030235052108765, "epoch": 7.29, "learning_rate": 1.3554522400676248e-05, "loss": 0.4107, "step": 8623, "task_loss": 0.6856584548950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3166125416755676, "epoch": 7.29, "learning_rate": 1.3550295857988166e-05, "loss": 0.4431, "step": 8624, "task_loss": 0.31369078159332275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.409542441368103, "epoch": 7.29, "learning_rate": 1.3546069315300086e-05, "loss": 0.3557, "step": 8625, "task_loss": 0.38980358839035034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3922196626663208, "epoch": 7.29, "learning_rate": 1.3541842772612004e-05, "loss": 0.3394, "step": 8626, "task_loss": 0.6547351479530334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48632580041885376, "epoch": 7.29, "learning_rate": 1.3537616229923922e-05, "loss": 0.3663, "step": 8627, "task_loss": 0.4329163432121277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1942305862903595, "epoch": 7.29, "learning_rate": 1.3533389687235843e-05, "loss": 0.355, "step": 8628, "task_loss": 0.723336935043335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4070796072483063, "epoch": 7.29, "learning_rate": 1.352916314454776e-05, "loss": 0.4961, "step": 8629, "task_loss": 1.051347017288208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4013592302799225, "epoch": 7.29, "learning_rate": 1.352493660185968e-05, "loss": 0.3931, "step": 8630, "task_loss": 0.47482162714004517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19653481245040894, "epoch": 7.3, "learning_rate": 1.3520710059171599e-05, "loss": 0.2683, "step": 8631, "task_loss": 0.025963615626096725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44278013706207275, "epoch": 7.3, "learning_rate": 1.3516483516483517e-05, "loss": 0.3423, "step": 8632, "task_loss": 0.2865676283836365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4132575988769531, "epoch": 7.3, "learning_rate": 1.3512256973795437e-05, "loss": 0.3011, "step": 8633, "task_loss": 0.505929172039032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3711126446723938, "epoch": 7.3, "learning_rate": 1.3508030431107355e-05, "loss": 0.474, "step": 8634, "task_loss": 0.3926868140697479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2883041501045227, "epoch": 7.3, "learning_rate": 1.3503803888419273e-05, "loss": 0.4002, "step": 8635, "task_loss": 0.9216748476028442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3653143048286438, "epoch": 7.3, "learning_rate": 1.3499577345731192e-05, "loss": 0.3633, "step": 8636, "task_loss": 0.4034796357154846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3774307370185852, "epoch": 7.3, "learning_rate": 1.349535080304311e-05, "loss": 0.401, "step": 8637, "task_loss": 0.7192955613136292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2929084599018097, "epoch": 7.3, "learning_rate": 1.3491124260355032e-05, "loss": 0.2942, "step": 8638, "task_loss": 0.26929783821105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3468337655067444, "epoch": 7.3, "learning_rate": 1.348689771766695e-05, "loss": 0.3722, "step": 8639, "task_loss": 0.2371395081281662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4155738949775696, "epoch": 7.3, "learning_rate": 1.3482671174978866e-05, "loss": 0.4063, "step": 8640, "task_loss": 0.6588971614837646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20427995920181274, "epoch": 7.3, "learning_rate": 1.3478444632290788e-05, "loss": 0.3478, "step": 8641, "task_loss": 0.8175039291381836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31295791268348694, "epoch": 7.3, "learning_rate": 1.3474218089602706e-05, "loss": 0.2734, "step": 8642, "task_loss": 0.5994687676429749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6649050712585449, "epoch": 7.31, "learning_rate": 1.3469991546914624e-05, "loss": 0.4422, "step": 8643, "task_loss": 0.4326794147491455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28210699558258057, "epoch": 7.31, "learning_rate": 1.3465765004226544e-05, "loss": 0.3455, "step": 8644, "task_loss": 0.46208304166793823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27043116092681885, "epoch": 7.31, "learning_rate": 1.3461538461538462e-05, "loss": 0.3437, "step": 8645, "task_loss": 0.0951174944639206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.552838146686554, "epoch": 7.31, "learning_rate": 1.3457311918850381e-05, "loss": 0.4889, "step": 8646, "task_loss": 0.821312427520752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.249177485704422, "epoch": 7.31, "learning_rate": 1.34530853761623e-05, "loss": 0.3387, "step": 8647, "task_loss": 0.364907443523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28236812353134155, "epoch": 7.31, "learning_rate": 1.3448858833474217e-05, "loss": 0.311, "step": 8648, "task_loss": 0.14876903593540192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48333948850631714, "epoch": 7.31, "learning_rate": 1.3444632290786139e-05, "loss": 0.4965, "step": 8649, "task_loss": 0.4771033525466919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40405696630477905, "epoch": 7.31, "learning_rate": 1.3440405748098055e-05, "loss": 0.4487, "step": 8650, "task_loss": 1.2152924537658691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34432879090309143, "epoch": 7.31, "learning_rate": 1.3436179205409977e-05, "loss": 0.4606, "step": 8651, "task_loss": 0.45158663392066956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3807457685470581, "epoch": 7.31, "learning_rate": 1.3431952662721895e-05, "loss": 0.4927, "step": 8652, "task_loss": 1.035140037536621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37602412700653076, "epoch": 7.31, "learning_rate": 1.3427726120033813e-05, "loss": 0.3116, "step": 8653, "task_loss": 0.7416728138923645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3265550434589386, "epoch": 7.32, "learning_rate": 1.3423499577345733e-05, "loss": 0.3903, "step": 8654, "task_loss": 0.9113268256187439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3064642548561096, "epoch": 7.32, "learning_rate": 1.341927303465765e-05, "loss": 0.3587, "step": 8655, "task_loss": 0.5006517171859741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4127204418182373, "epoch": 7.32, "learning_rate": 1.3415046491969569e-05, "loss": 0.3644, "step": 8656, "task_loss": 1.1537973880767822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41386687755584717, "epoch": 7.32, "learning_rate": 1.3410819949281488e-05, "loss": 0.3883, "step": 8657, "task_loss": 0.1646079421043396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20974020659923553, "epoch": 7.32, "learning_rate": 1.3406593406593406e-05, "loss": 0.3376, "step": 8658, "task_loss": 1.1433213949203491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4548261761665344, "epoch": 7.32, "learning_rate": 1.3402366863905328e-05, "loss": 0.3723, "step": 8659, "task_loss": 0.23796841502189636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5329951047897339, "epoch": 7.32, "learning_rate": 1.3398140321217246e-05, "loss": 0.3801, "step": 8660, "task_loss": 1.2379002571105957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4140164852142334, "epoch": 7.32, "learning_rate": 1.3393913778529162e-05, "loss": 0.4092, "step": 8661, "task_loss": 0.6631278991699219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3560425639152527, "epoch": 7.32, "learning_rate": 1.3389687235841084e-05, "loss": 0.3162, "step": 8662, "task_loss": 0.15443722903728485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29310721158981323, "epoch": 7.32, "learning_rate": 1.3385460693153002e-05, "loss": 0.336, "step": 8663, "task_loss": 0.3120836317539215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4309068024158478, "epoch": 7.32, "learning_rate": 1.338123415046492e-05, "loss": 0.5265, "step": 8664, "task_loss": 0.6378076076507568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3921373784542084, "epoch": 7.32, "learning_rate": 1.337700760777684e-05, "loss": 0.4362, "step": 8665, "task_loss": 0.9705500602722168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4018746614456177, "epoch": 7.33, "learning_rate": 1.3372781065088758e-05, "loss": 0.416, "step": 8666, "task_loss": 0.7478629946708679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42230039834976196, "epoch": 7.33, "learning_rate": 1.3368554522400677e-05, "loss": 0.4388, "step": 8667, "task_loss": 0.18032880127429962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4468831717967987, "epoch": 7.33, "learning_rate": 1.3364327979712595e-05, "loss": 0.4433, "step": 8668, "task_loss": 0.9867525696754456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2717626690864563, "epoch": 7.33, "learning_rate": 1.3360101437024513e-05, "loss": 0.4367, "step": 8669, "task_loss": 1.1541475057601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40353021025657654, "epoch": 7.33, "learning_rate": 1.3355874894336435e-05, "loss": 0.3665, "step": 8670, "task_loss": 0.7211099863052368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2924869954586029, "epoch": 7.33, "learning_rate": 1.3351648351648353e-05, "loss": 0.4751, "step": 8671, "task_loss": 0.8653159141540527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2930752635002136, "epoch": 7.33, "learning_rate": 1.334742180896027e-05, "loss": 0.3487, "step": 8672, "task_loss": 0.8559185862541199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4897002577781677, "epoch": 7.33, "learning_rate": 1.334319526627219e-05, "loss": 0.4671, "step": 8673, "task_loss": 0.6756983995437622 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.211037740111351, "epoch": 7.33, "learning_rate": 1.3338968723584109e-05, "loss": 0.3556, "step": 8674, "task_loss": 0.6291630864143372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3360767960548401, "epoch": 7.33, "learning_rate": 1.3334742180896028e-05, "loss": 0.3478, "step": 8675, "task_loss": 0.669925332069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4772075116634369, "epoch": 7.33, "learning_rate": 1.3330515638207946e-05, "loss": 0.3986, "step": 8676, "task_loss": 0.6321662664413452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24405543506145477, "epoch": 7.33, "learning_rate": 1.3326289095519864e-05, "loss": 0.3657, "step": 8677, "task_loss": 0.1629209816455841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4331558644771576, "epoch": 7.34, "learning_rate": 1.3322062552831784e-05, "loss": 0.3649, "step": 8678, "task_loss": 1.1110867261886597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3111623525619507, "epoch": 7.34, "learning_rate": 1.3317836010143702e-05, "loss": 0.4241, "step": 8679, "task_loss": 0.8777350187301636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.500487208366394, "epoch": 7.34, "learning_rate": 1.3313609467455624e-05, "loss": 0.4438, "step": 8680, "task_loss": 0.9502843022346497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28063181042671204, "epoch": 7.34, "learning_rate": 1.3309382924767542e-05, "loss": 0.4563, "step": 8681, "task_loss": 0.5501363277435303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43725597858428955, "epoch": 7.34, "learning_rate": 1.3305156382079458e-05, "loss": 0.3996, "step": 8682, "task_loss": 1.4438990354537964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3341028392314911, "epoch": 7.34, "learning_rate": 1.330092983939138e-05, "loss": 0.2957, "step": 8683, "task_loss": 0.5016899704933167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45649781823158264, "epoch": 7.34, "learning_rate": 1.3296703296703298e-05, "loss": 0.2901, "step": 8684, "task_loss": 0.8486303091049194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45747965574264526, "epoch": 7.34, "learning_rate": 1.3292476754015216e-05, "loss": 0.4244, "step": 8685, "task_loss": 0.47018468379974365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45868751406669617, "epoch": 7.34, "learning_rate": 1.3288250211327135e-05, "loss": 0.4547, "step": 8686, "task_loss": 0.7005316615104675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3640376925468445, "epoch": 7.34, "learning_rate": 1.3284023668639053e-05, "loss": 0.4192, "step": 8687, "task_loss": 0.13469792902469635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18828928470611572, "epoch": 7.34, "learning_rate": 1.3279797125950975e-05, "loss": 0.3303, "step": 8688, "task_loss": 0.5118227005004883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2525501847267151, "epoch": 7.34, "learning_rate": 1.3275570583262891e-05, "loss": 0.2949, "step": 8689, "task_loss": 0.3184832036495209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4442913234233856, "epoch": 7.35, "learning_rate": 1.327134404057481e-05, "loss": 0.5297, "step": 8690, "task_loss": 0.5667042136192322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26817652583122253, "epoch": 7.35, "learning_rate": 1.326711749788673e-05, "loss": 0.4387, "step": 8691, "task_loss": 0.27988946437835693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.213149756193161, "epoch": 7.35, "learning_rate": 1.3262890955198649e-05, "loss": 0.3333, "step": 8692, "task_loss": 0.051734503358602524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.410963237285614, "epoch": 7.35, "learning_rate": 1.3258664412510565e-05, "loss": 0.3889, "step": 8693, "task_loss": 0.6384717226028442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3206910192966461, "epoch": 7.35, "learning_rate": 1.3254437869822487e-05, "loss": 0.3926, "step": 8694, "task_loss": 0.5632293224334717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49062347412109375, "epoch": 7.35, "learning_rate": 1.3250211327134405e-05, "loss": 0.5304, "step": 8695, "task_loss": 0.5054396986961365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5377039909362793, "epoch": 7.35, "learning_rate": 1.3245984784446324e-05, "loss": 0.4534, "step": 8696, "task_loss": 0.4006824493408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5226354598999023, "epoch": 7.35, "learning_rate": 1.3241758241758242e-05, "loss": 0.51, "step": 8697, "task_loss": 0.7198591232299805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3524794280529022, "epoch": 7.35, "learning_rate": 1.323753169907016e-05, "loss": 0.4425, "step": 8698, "task_loss": 0.7367194294929504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43069273233413696, "epoch": 7.35, "learning_rate": 1.323330515638208e-05, "loss": 0.3887, "step": 8699, "task_loss": 0.6201344728469849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38378632068634033, "epoch": 7.35, "learning_rate": 1.3229078613693998e-05, "loss": 0.4201, "step": 8700, "task_loss": 0.7699970006942749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40096601843833923, "epoch": 7.35, "learning_rate": 1.3224852071005916e-05, "loss": 0.3627, "step": 8701, "task_loss": 1.081023931503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27769097685813904, "epoch": 7.36, "learning_rate": 1.3220625528317838e-05, "loss": 0.4298, "step": 8702, "task_loss": 0.21367943286895752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2748101055622101, "epoch": 7.36, "learning_rate": 1.3216398985629756e-05, "loss": 0.3124, "step": 8703, "task_loss": 0.17865511775016785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4741690754890442, "epoch": 7.36, "learning_rate": 1.3212172442941675e-05, "loss": 0.3848, "step": 8704, "task_loss": 0.7435306310653687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2573087215423584, "epoch": 7.36, "learning_rate": 1.3207945900253593e-05, "loss": 0.3032, "step": 8705, "task_loss": 1.0739707946777344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45296263694763184, "epoch": 7.36, "learning_rate": 1.3203719357565512e-05, "loss": 0.5639, "step": 8706, "task_loss": 0.6117860078811646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4623570442199707, "epoch": 7.36, "learning_rate": 1.3199492814877431e-05, "loss": 0.3687, "step": 8707, "task_loss": 0.7984516024589539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5325337648391724, "epoch": 7.36, "learning_rate": 1.319526627218935e-05, "loss": 0.3462, "step": 8708, "task_loss": 1.0781035423278809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3728008270263672, "epoch": 7.36, "learning_rate": 1.3191039729501267e-05, "loss": 0.3046, "step": 8709, "task_loss": 0.40966176986694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2892404794692993, "epoch": 7.36, "learning_rate": 1.3186813186813187e-05, "loss": 0.3457, "step": 8710, "task_loss": 0.7240236401557922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2686305046081543, "epoch": 7.36, "learning_rate": 1.3182586644125105e-05, "loss": 0.4605, "step": 8711, "task_loss": 0.4243745505809784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5931345224380493, "epoch": 7.36, "learning_rate": 1.3178360101437027e-05, "loss": 0.4884, "step": 8712, "task_loss": 0.1295870840549469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5213711857795715, "epoch": 7.36, "learning_rate": 1.3174133558748945e-05, "loss": 0.4058, "step": 8713, "task_loss": 0.24551443755626678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5323390364646912, "epoch": 7.37, "learning_rate": 1.3169907016060861e-05, "loss": 0.3589, "step": 8714, "task_loss": 0.62319415807724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23142176866531372, "epoch": 7.37, "learning_rate": 1.3165680473372782e-05, "loss": 0.497, "step": 8715, "task_loss": 0.09799226373434067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4250490069389343, "epoch": 7.37, "learning_rate": 1.31614539306847e-05, "loss": 0.3637, "step": 8716, "task_loss": 0.578825056552887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3273549973964691, "epoch": 7.37, "learning_rate": 1.315722738799662e-05, "loss": 0.4016, "step": 8717, "task_loss": 0.21588104963302612 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5098577737808228, "epoch": 7.37, "learning_rate": 1.3153000845308538e-05, "loss": 0.5196, "step": 8718, "task_loss": 0.8958927989006042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32574883103370667, "epoch": 7.37, "learning_rate": 1.3148774302620456e-05, "loss": 0.3875, "step": 8719, "task_loss": 0.4246179759502411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3771357536315918, "epoch": 7.37, "learning_rate": 1.3144547759932378e-05, "loss": 0.3966, "step": 8720, "task_loss": 0.6820191144943237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33946099877357483, "epoch": 7.37, "learning_rate": 1.3140321217244294e-05, "loss": 0.4666, "step": 8721, "task_loss": 0.06475155055522919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3084554374217987, "epoch": 7.37, "learning_rate": 1.3136094674556212e-05, "loss": 0.4377, "step": 8722, "task_loss": 0.7589787244796753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3583388328552246, "epoch": 7.37, "learning_rate": 1.3131868131868134e-05, "loss": 0.3033, "step": 8723, "task_loss": 0.39629092812538147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37276482582092285, "epoch": 7.37, "learning_rate": 1.3127641589180052e-05, "loss": 0.393, "step": 8724, "task_loss": 0.26068851351737976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7178219556808472, "epoch": 7.38, "learning_rate": 1.3123415046491971e-05, "loss": 0.4809, "step": 8725, "task_loss": 0.8537161946296692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5248575210571289, "epoch": 7.38, "learning_rate": 1.311918850380389e-05, "loss": 0.4028, "step": 8726, "task_loss": 1.1732163429260254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41542187333106995, "epoch": 7.38, "learning_rate": 1.3114961961115807e-05, "loss": 0.4006, "step": 8727, "task_loss": 0.33321696519851685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35913047194480896, "epoch": 7.38, "learning_rate": 1.3110735418427727e-05, "loss": 0.3435, "step": 8728, "task_loss": 0.7797046899795532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.265575647354126, "epoch": 7.38, "learning_rate": 1.3106508875739645e-05, "loss": 0.3489, "step": 8729, "task_loss": 0.42789167165756226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7339180707931519, "epoch": 7.38, "learning_rate": 1.3102282333051563e-05, "loss": 0.5076, "step": 8730, "task_loss": 0.5267307758331299 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2875024080276489, "epoch": 7.38, "learning_rate": 1.3098055790363483e-05, "loss": 0.3845, "step": 8731, "task_loss": 0.4913184344768524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47813326120376587, "epoch": 7.38, "learning_rate": 1.3093829247675401e-05, "loss": 0.3513, "step": 8732, "task_loss": 0.49101531505584717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48580288887023926, "epoch": 7.38, "learning_rate": 1.3089602704987322e-05, "loss": 0.533, "step": 8733, "task_loss": 0.3537449538707733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5516266822814941, "epoch": 7.38, "learning_rate": 1.308537616229924e-05, "loss": 0.3627, "step": 8734, "task_loss": 0.7668045163154602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.369252473115921, "epoch": 7.38, "learning_rate": 1.3081149619611159e-05, "loss": 0.4479, "step": 8735, "task_loss": 1.067196249961853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35767072439193726, "epoch": 7.38, "learning_rate": 1.3076923076923078e-05, "loss": 0.4375, "step": 8736, "task_loss": 0.20479261875152588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8271821141242981, "epoch": 7.39, "learning_rate": 1.3072696534234996e-05, "loss": 0.5526, "step": 8737, "task_loss": 0.7176409363746643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25087279081344604, "epoch": 7.39, "learning_rate": 1.3068469991546914e-05, "loss": 0.3388, "step": 8738, "task_loss": 0.287771612405777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37465551495552063, "epoch": 7.39, "learning_rate": 1.3064243448858834e-05, "loss": 0.4188, "step": 8739, "task_loss": 0.20908790826797485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22373594343662262, "epoch": 7.39, "learning_rate": 1.3060016906170752e-05, "loss": 0.3345, "step": 8740, "task_loss": 0.40413469076156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3162654936313629, "epoch": 7.39, "learning_rate": 1.3055790363482674e-05, "loss": 0.38, "step": 8741, "task_loss": 0.2968984544277191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4183715283870697, "epoch": 7.39, "learning_rate": 1.305156382079459e-05, "loss": 0.3276, "step": 8742, "task_loss": 0.41445067524909973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3937259316444397, "epoch": 7.39, "learning_rate": 1.3047337278106508e-05, "loss": 0.397, "step": 8743, "task_loss": 0.616841197013855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32108646631240845, "epoch": 7.39, "learning_rate": 1.304311073541843e-05, "loss": 0.4189, "step": 8744, "task_loss": 0.1801363229751587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2981376647949219, "epoch": 7.39, "learning_rate": 1.3038884192730347e-05, "loss": 0.5132, "step": 8745, "task_loss": 1.2172858715057373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37557950615882874, "epoch": 7.39, "learning_rate": 1.3034657650042267e-05, "loss": 0.3243, "step": 8746, "task_loss": 0.0847000852227211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4972626268863678, "epoch": 7.39, "learning_rate": 1.3030431107354185e-05, "loss": 0.3945, "step": 8747, "task_loss": 1.1767926216125488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23472389578819275, "epoch": 7.39, "learning_rate": 1.3026204564666103e-05, "loss": 0.3502, "step": 8748, "task_loss": 0.720318615436554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32289016246795654, "epoch": 7.4, "learning_rate": 1.3021978021978023e-05, "loss": 0.3628, "step": 8749, "task_loss": 0.6835820078849792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2789159119129181, "epoch": 7.4, "learning_rate": 1.3017751479289941e-05, "loss": 0.32, "step": 8750, "task_loss": 0.04337075352668762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5624732971191406, "epoch": 7.4, "learning_rate": 1.3013524936601859e-05, "loss": 0.4605, "step": 8751, "task_loss": 0.2518502175807953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37176501750946045, "epoch": 7.4, "learning_rate": 1.300929839391378e-05, "loss": 0.3485, "step": 8752, "task_loss": 0.25776785612106323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.595321774482727, "epoch": 7.4, "learning_rate": 1.3005071851225697e-05, "loss": 0.4822, "step": 8753, "task_loss": 0.5814257860183716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.369944304227829, "epoch": 7.4, "learning_rate": 1.3000845308537618e-05, "loss": 0.3418, "step": 8754, "task_loss": 0.6023995876312256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25749728083610535, "epoch": 7.4, "learning_rate": 1.2996618765849536e-05, "loss": 0.3612, "step": 8755, "task_loss": 0.2631537616252899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3775484561920166, "epoch": 7.4, "learning_rate": 1.2992392223161454e-05, "loss": 0.4429, "step": 8756, "task_loss": 0.16342660784721375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28732916712760925, "epoch": 7.4, "learning_rate": 1.2988165680473374e-05, "loss": 0.3413, "step": 8757, "task_loss": 0.3057233691215515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38720449805259705, "epoch": 7.4, "learning_rate": 1.2983939137785292e-05, "loss": 0.4612, "step": 8758, "task_loss": 0.8248981833457947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8441869020462036, "epoch": 7.4, "learning_rate": 1.297971259509721e-05, "loss": 0.5863, "step": 8759, "task_loss": 2.0191593170166016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5722415447235107, "epoch": 7.4, "learning_rate": 1.297548605240913e-05, "loss": 0.5337, "step": 8760, "task_loss": 1.232725977897644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.56423020362854, "epoch": 7.41, "learning_rate": 1.2971259509721048e-05, "loss": 0.4796, "step": 8761, "task_loss": 0.38701876997947693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5151432156562805, "epoch": 7.41, "learning_rate": 1.296703296703297e-05, "loss": 0.5124, "step": 8762, "task_loss": 0.8187679052352905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3470328450202942, "epoch": 7.41, "learning_rate": 1.2962806424344886e-05, "loss": 0.3222, "step": 8763, "task_loss": 0.542992115020752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3804287910461426, "epoch": 7.41, "learning_rate": 1.2958579881656804e-05, "loss": 0.3741, "step": 8764, "task_loss": 0.40430596470832825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.594031572341919, "epoch": 7.41, "learning_rate": 1.2954353338968725e-05, "loss": 0.5291, "step": 8765, "task_loss": 1.3874036073684692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43327775597572327, "epoch": 7.41, "learning_rate": 1.2950126796280643e-05, "loss": 0.4521, "step": 8766, "task_loss": 0.6687437295913696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5368461012840271, "epoch": 7.41, "learning_rate": 1.2945900253592561e-05, "loss": 0.4776, "step": 8767, "task_loss": 0.9040844440460205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2301350235939026, "epoch": 7.41, "learning_rate": 1.2941673710904481e-05, "loss": 0.5068, "step": 8768, "task_loss": 0.5706980228424072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35703396797180176, "epoch": 7.41, "learning_rate": 1.2937447168216399e-05, "loss": 0.4162, "step": 8769, "task_loss": 0.025224963203072548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34440574049949646, "epoch": 7.41, "learning_rate": 1.2933220625528319e-05, "loss": 0.4272, "step": 8770, "task_loss": 0.5785366296768188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3016105592250824, "epoch": 7.41, "learning_rate": 1.2928994082840237e-05, "loss": 0.3668, "step": 8771, "task_loss": 0.16697748005390167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.546068549156189, "epoch": 7.41, "learning_rate": 1.2924767540152155e-05, "loss": 0.4393, "step": 8772, "task_loss": 0.6965318322181702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49050113558769226, "epoch": 7.42, "learning_rate": 1.2920540997464076e-05, "loss": 0.43, "step": 8773, "task_loss": 0.8907085061073303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2174573838710785, "epoch": 7.42, "learning_rate": 1.2916314454775993e-05, "loss": 0.2989, "step": 8774, "task_loss": 0.2977360785007477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5348559617996216, "epoch": 7.42, "learning_rate": 1.2912087912087914e-05, "loss": 0.4185, "step": 8775, "task_loss": 0.7671751379966736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4547441899776459, "epoch": 7.42, "learning_rate": 1.2907861369399832e-05, "loss": 0.6148, "step": 8776, "task_loss": 1.0726524591445923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2778957188129425, "epoch": 7.42, "learning_rate": 1.290363482671175e-05, "loss": 0.4236, "step": 8777, "task_loss": 0.286037802696228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3593287765979767, "epoch": 7.42, "learning_rate": 1.289940828402367e-05, "loss": 0.4359, "step": 8778, "task_loss": 0.5764700174331665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5996150374412537, "epoch": 7.42, "learning_rate": 1.2895181741335588e-05, "loss": 0.3847, "step": 8779, "task_loss": 0.3056407868862152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37883490324020386, "epoch": 7.42, "learning_rate": 1.2890955198647506e-05, "loss": 0.3708, "step": 8780, "task_loss": 0.48093315958976746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26571112871170044, "epoch": 7.42, "learning_rate": 1.2886728655959426e-05, "loss": 0.4002, "step": 8781, "task_loss": 0.21415020525455475 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5493648052215576, "epoch": 7.42, "learning_rate": 1.2882502113271344e-05, "loss": 0.3672, "step": 8782, "task_loss": 0.5265618562698364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13616833090782166, "epoch": 7.42, "learning_rate": 1.2878275570583265e-05, "loss": 0.3355, "step": 8783, "task_loss": 0.254390686750412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3377780020236969, "epoch": 7.42, "learning_rate": 1.2874049027895183e-05, "loss": 0.3434, "step": 8784, "task_loss": 0.36878496408462524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3592544198036194, "epoch": 7.43, "learning_rate": 1.28698224852071e-05, "loss": 0.5211, "step": 8785, "task_loss": 0.7577958106994629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36772942543029785, "epoch": 7.43, "learning_rate": 1.2865595942519021e-05, "loss": 0.3931, "step": 8786, "task_loss": 0.6353535652160645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43884795904159546, "epoch": 7.43, "learning_rate": 1.286136939983094e-05, "loss": 0.3548, "step": 8787, "task_loss": 0.7720987796783447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39639517664909363, "epoch": 7.43, "learning_rate": 1.2857142857142857e-05, "loss": 0.3646, "step": 8788, "task_loss": 0.5395711064338684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45970532298088074, "epoch": 7.43, "learning_rate": 1.2852916314454777e-05, "loss": 0.3565, "step": 8789, "task_loss": 0.3985978960990906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2692222595214844, "epoch": 7.43, "learning_rate": 1.2848689771766695e-05, "loss": 0.4606, "step": 8790, "task_loss": 0.7749914526939392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35242217779159546, "epoch": 7.43, "learning_rate": 1.2844463229078615e-05, "loss": 0.3378, "step": 8791, "task_loss": 0.19448921084403992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6296912431716919, "epoch": 7.43, "learning_rate": 1.2840236686390533e-05, "loss": 0.4708, "step": 8792, "task_loss": 0.9199957847595215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4764683246612549, "epoch": 7.43, "learning_rate": 1.283601014370245e-05, "loss": 0.391, "step": 8793, "task_loss": 0.3603833019733429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5886640548706055, "epoch": 7.43, "learning_rate": 1.2831783601014372e-05, "loss": 0.4655, "step": 8794, "task_loss": 0.6643750667572021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4680676758289337, "epoch": 7.43, "learning_rate": 1.2827557058326289e-05, "loss": 0.4574, "step": 8795, "task_loss": 0.18839330971240997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43024617433547974, "epoch": 7.44, "learning_rate": 1.2823330515638207e-05, "loss": 0.4287, "step": 8796, "task_loss": 0.22537106275558472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3115088939666748, "epoch": 7.44, "learning_rate": 1.2819103972950128e-05, "loss": 0.4744, "step": 8797, "task_loss": 0.03869803249835968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22779077291488647, "epoch": 7.44, "learning_rate": 1.2814877430262046e-05, "loss": 0.3737, "step": 8798, "task_loss": 0.024479741230607033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41871362924575806, "epoch": 7.44, "learning_rate": 1.2810650887573966e-05, "loss": 0.4318, "step": 8799, "task_loss": 0.909135103225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.264763206243515, "epoch": 7.44, "learning_rate": 1.2806424344885884e-05, "loss": 0.4253, "step": 8800, "task_loss": 0.534777820110321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43819350004196167, "epoch": 7.44, "learning_rate": 1.2802197802197802e-05, "loss": 0.4825, "step": 8801, "task_loss": 0.6781547665596008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3064551055431366, "epoch": 7.44, "learning_rate": 1.2797971259509722e-05, "loss": 0.5006, "step": 8802, "task_loss": 0.8987891674041748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49585264921188354, "epoch": 7.44, "learning_rate": 1.279374471682164e-05, "loss": 0.4279, "step": 8803, "task_loss": 0.702390730381012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3444201648235321, "epoch": 7.44, "learning_rate": 1.2789518174133561e-05, "loss": 0.454, "step": 8804, "task_loss": 0.5378071665763855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18658792972564697, "epoch": 7.44, "learning_rate": 1.278529163144548e-05, "loss": 0.2447, "step": 8805, "task_loss": 0.20429472625255585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3701540529727936, "epoch": 7.44, "learning_rate": 1.2781065088757396e-05, "loss": 0.4203, "step": 8806, "task_loss": 0.181137815117836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6214600801467896, "epoch": 7.44, "learning_rate": 1.2776838546069317e-05, "loss": 0.4063, "step": 8807, "task_loss": 0.6276839375495911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3892475366592407, "epoch": 7.45, "learning_rate": 1.2772612003381235e-05, "loss": 0.4763, "step": 8808, "task_loss": 0.9633103013038635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5496237277984619, "epoch": 7.45, "learning_rate": 1.2768385460693153e-05, "loss": 0.4863, "step": 8809, "task_loss": 0.7736993432044983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42636626958847046, "epoch": 7.45, "learning_rate": 1.2764158918005073e-05, "loss": 0.3829, "step": 8810, "task_loss": 0.12462669610977173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20963656902313232, "epoch": 7.45, "learning_rate": 1.2759932375316991e-05, "loss": 0.3667, "step": 8811, "task_loss": 0.22267165780067444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33875682950019836, "epoch": 7.45, "learning_rate": 1.275570583262891e-05, "loss": 0.3839, "step": 8812, "task_loss": 0.4067496657371521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42679429054260254, "epoch": 7.45, "learning_rate": 1.2751479289940829e-05, "loss": 0.4585, "step": 8813, "task_loss": 0.7633543014526367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24640582501888275, "epoch": 7.45, "learning_rate": 1.2747252747252747e-05, "loss": 0.3258, "step": 8814, "task_loss": 0.1258978396654129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14239676296710968, "epoch": 7.45, "learning_rate": 1.2743026204564668e-05, "loss": 0.3289, "step": 8815, "task_loss": 0.489003449678421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2638423442840576, "epoch": 7.45, "learning_rate": 1.2738799661876586e-05, "loss": 0.471, "step": 8816, "task_loss": 0.52748703956604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39062976837158203, "epoch": 7.45, "learning_rate": 1.2734573119188503e-05, "loss": 0.3689, "step": 8817, "task_loss": 0.21361885964870453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6631650924682617, "epoch": 7.45, "learning_rate": 1.2730346576500424e-05, "loss": 0.446, "step": 8818, "task_loss": 1.7960970401763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6322839856147766, "epoch": 7.45, "learning_rate": 1.2726120033812342e-05, "loss": 0.5278, "step": 8819, "task_loss": 0.6124684810638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33605310320854187, "epoch": 7.46, "learning_rate": 1.2721893491124262e-05, "loss": 0.3552, "step": 8820, "task_loss": 0.2629280090332031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30692028999328613, "epoch": 7.46, "learning_rate": 1.271766694843618e-05, "loss": 0.4591, "step": 8821, "task_loss": 0.17651055753231049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42437222599983215, "epoch": 7.46, "learning_rate": 1.2713440405748098e-05, "loss": 0.399, "step": 8822, "task_loss": 0.9975764155387878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4372026026248932, "epoch": 7.46, "learning_rate": 1.2709213863060018e-05, "loss": 0.3536, "step": 8823, "task_loss": 0.36033856868743896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3313635587692261, "epoch": 7.46, "learning_rate": 1.2704987320371936e-05, "loss": 0.4431, "step": 8824, "task_loss": 0.44853127002716064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20918361842632294, "epoch": 7.46, "learning_rate": 1.2700760777683854e-05, "loss": 0.3561, "step": 8825, "task_loss": 0.2294342964887619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4094730019569397, "epoch": 7.46, "learning_rate": 1.2696534234995775e-05, "loss": 0.3733, "step": 8826, "task_loss": 0.6787793040275574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3235383927822113, "epoch": 7.46, "learning_rate": 1.2692307692307691e-05, "loss": 0.5037, "step": 8827, "task_loss": 0.9295744299888611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6724331378936768, "epoch": 7.46, "learning_rate": 1.2688081149619613e-05, "loss": 0.448, "step": 8828, "task_loss": 0.9057302474975586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4415440261363983, "epoch": 7.46, "learning_rate": 1.2683854606931531e-05, "loss": 0.4352, "step": 8829, "task_loss": 0.4219878315925598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34139037132263184, "epoch": 7.46, "learning_rate": 1.2679628064243449e-05, "loss": 0.3915, "step": 8830, "task_loss": 1.065568208694458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4358985424041748, "epoch": 7.46, "learning_rate": 1.2675401521555369e-05, "loss": 0.442, "step": 8831, "task_loss": 0.8653429746627808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1320231854915619, "epoch": 7.47, "learning_rate": 1.2671174978867287e-05, "loss": 0.3247, "step": 8832, "task_loss": 0.27086687088012695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4135437607765198, "epoch": 7.47, "learning_rate": 1.2666948436179206e-05, "loss": 0.3872, "step": 8833, "task_loss": 0.8181164264678955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3609418570995331, "epoch": 7.47, "learning_rate": 1.2662721893491125e-05, "loss": 0.404, "step": 8834, "task_loss": 0.21947786211967468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2378140240907669, "epoch": 7.47, "learning_rate": 1.2658495350803043e-05, "loss": 0.4053, "step": 8835, "task_loss": 0.5357357859611511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2541757822036743, "epoch": 7.47, "learning_rate": 1.2654268808114964e-05, "loss": 0.3689, "step": 8836, "task_loss": 0.9392150640487671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6929994225502014, "epoch": 7.47, "learning_rate": 1.2650042265426882e-05, "loss": 0.4424, "step": 8837, "task_loss": 0.3312847912311554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46591809391975403, "epoch": 7.47, "learning_rate": 1.2645815722738798e-05, "loss": 0.3522, "step": 8838, "task_loss": 0.20715416967868805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2465772032737732, "epoch": 7.47, "learning_rate": 1.264158918005072e-05, "loss": 0.3597, "step": 8839, "task_loss": 0.3812654912471771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4125104546546936, "epoch": 7.47, "learning_rate": 1.2637362637362638e-05, "loss": 0.3281, "step": 8840, "task_loss": 0.11148248612880707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35475894808769226, "epoch": 7.47, "learning_rate": 1.2633136094674558e-05, "loss": 0.3713, "step": 8841, "task_loss": 1.0705935955047607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.612317681312561, "epoch": 7.47, "learning_rate": 1.2628909551986476e-05, "loss": 0.4326, "step": 8842, "task_loss": 1.7060391902923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31393200159072876, "epoch": 7.47, "learning_rate": 1.2624683009298394e-05, "loss": 0.4207, "step": 8843, "task_loss": 0.47597038745880127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29434478282928467, "epoch": 7.48, "learning_rate": 1.2620456466610313e-05, "loss": 0.4784, "step": 8844, "task_loss": 0.6927053332328796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2564399838447571, "epoch": 7.48, "learning_rate": 1.2616229923922232e-05, "loss": 0.503, "step": 8845, "task_loss": 0.15902084112167358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.12941963970661163, "epoch": 7.48, "learning_rate": 1.261200338123415e-05, "loss": 0.3658, "step": 8846, "task_loss": 0.13869743049144745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29704955220222473, "epoch": 7.48, "learning_rate": 1.2607776838546071e-05, "loss": 0.4081, "step": 8847, "task_loss": 0.7093079686164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23696014285087585, "epoch": 7.48, "learning_rate": 1.2603550295857989e-05, "loss": 0.4511, "step": 8848, "task_loss": 0.36203086376190186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17028087377548218, "epoch": 7.48, "learning_rate": 1.2599323753169909e-05, "loss": 0.3792, "step": 8849, "task_loss": 0.52455073595047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2588842809200287, "epoch": 7.48, "learning_rate": 1.2595097210481827e-05, "loss": 0.3485, "step": 8850, "task_loss": 0.4023149013519287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4107217788696289, "epoch": 7.48, "learning_rate": 1.2590870667793745e-05, "loss": 0.4252, "step": 8851, "task_loss": 0.4958897829055786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4180072546005249, "epoch": 7.48, "learning_rate": 1.2586644125105665e-05, "loss": 0.3085, "step": 8852, "task_loss": 0.48024940490722656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4177292585372925, "epoch": 7.48, "learning_rate": 1.2582417582417583e-05, "loss": 0.4258, "step": 8853, "task_loss": 0.28809022903442383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2382304072380066, "epoch": 7.48, "learning_rate": 1.25781910397295e-05, "loss": 0.4979, "step": 8854, "task_loss": 0.07806060463190079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4962015748023987, "epoch": 7.48, "learning_rate": 1.257396449704142e-05, "loss": 0.4374, "step": 8855, "task_loss": 0.49213072657585144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4624560475349426, "epoch": 7.49, "learning_rate": 1.2569737954353338e-05, "loss": 0.3953, "step": 8856, "task_loss": 0.775689423084259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34697291254997253, "epoch": 7.49, "learning_rate": 1.256551141166526e-05, "loss": 0.3447, "step": 8857, "task_loss": 0.8250847458839417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1905665546655655, "epoch": 7.49, "learning_rate": 1.2561284868977178e-05, "loss": 0.3133, "step": 8858, "task_loss": 0.8301593661308289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2887338697910309, "epoch": 7.49, "learning_rate": 1.2557058326289094e-05, "loss": 0.3094, "step": 8859, "task_loss": 0.3718477487564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3488732874393463, "epoch": 7.49, "learning_rate": 1.2552831783601016e-05, "loss": 0.4544, "step": 8860, "task_loss": 0.8264468908309937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35679537057876587, "epoch": 7.49, "learning_rate": 1.2548605240912934e-05, "loss": 0.2933, "step": 8861, "task_loss": 0.32985883951187134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6441663503646851, "epoch": 7.49, "learning_rate": 1.2544378698224854e-05, "loss": 0.4791, "step": 8862, "task_loss": 1.0300378799438477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2912566065788269, "epoch": 7.49, "learning_rate": 1.2540152155536772e-05, "loss": 0.3663, "step": 8863, "task_loss": 0.6975594758987427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24026711285114288, "epoch": 7.49, "learning_rate": 1.253592561284869e-05, "loss": 0.2731, "step": 8864, "task_loss": 0.6998701095581055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.319728285074234, "epoch": 7.49, "learning_rate": 1.253169907016061e-05, "loss": 0.3631, "step": 8865, "task_loss": 0.7206800580024719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2280956655740738, "epoch": 7.49, "learning_rate": 1.2527472527472527e-05, "loss": 0.4835, "step": 8866, "task_loss": 1.1404627561569214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2612929344177246, "epoch": 7.5, "learning_rate": 1.2523245984784445e-05, "loss": 0.3278, "step": 8867, "task_loss": 0.13966603577136993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.288677453994751, "epoch": 7.5, "learning_rate": 1.2519019442096367e-05, "loss": 0.4266, "step": 8868, "task_loss": 0.8758043050765991 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22271905839443207, "epoch": 7.5, "learning_rate": 1.2514792899408285e-05, "loss": 0.3702, "step": 8869, "task_loss": 0.10380499064922333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25997495651245117, "epoch": 7.5, "learning_rate": 1.2510566356720205e-05, "loss": 0.4088, "step": 8870, "task_loss": 0.3332921266555786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34686386585235596, "epoch": 7.5, "learning_rate": 1.2506339814032123e-05, "loss": 0.379, "step": 8871, "task_loss": 0.42655396461486816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4964175820350647, "epoch": 7.5, "learning_rate": 1.250211327134404e-05, "loss": 0.3086, "step": 8872, "task_loss": 0.30568987131118774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3937375545501709, "epoch": 7.5, "learning_rate": 1.2497886728655959e-05, "loss": 0.31, "step": 8873, "task_loss": 0.5503883957862854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23620827496051788, "epoch": 7.5, "learning_rate": 1.2493660185967879e-05, "loss": 0.4223, "step": 8874, "task_loss": 0.5142781138420105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2105209231376648, "epoch": 7.5, "learning_rate": 1.2489433643279798e-05, "loss": 0.3584, "step": 8875, "task_loss": 0.06056657060980797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33140671253204346, "epoch": 7.5, "learning_rate": 1.2485207100591716e-05, "loss": 0.378, "step": 8876, "task_loss": 0.28246891498565674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33876216411590576, "epoch": 7.5, "learning_rate": 1.2480980557903634e-05, "loss": 0.4594, "step": 8877, "task_loss": 0.43334996700286865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30420565605163574, "epoch": 7.5, "learning_rate": 1.2476754015215554e-05, "loss": 0.2899, "step": 8878, "task_loss": 0.11611993610858917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40651336312294006, "epoch": 7.51, "learning_rate": 1.2472527472527474e-05, "loss": 0.4605, "step": 8879, "task_loss": 0.0965336337685585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3722517788410187, "epoch": 7.51, "learning_rate": 1.2468300929839392e-05, "loss": 0.3537, "step": 8880, "task_loss": 0.7548536062240601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5767618417739868, "epoch": 7.51, "learning_rate": 1.246407438715131e-05, "loss": 0.3653, "step": 8881, "task_loss": 0.6869463324546814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24362243711948395, "epoch": 7.51, "learning_rate": 1.245984784446323e-05, "loss": 0.5005, "step": 8882, "task_loss": 1.1425288915634155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5760525465011597, "epoch": 7.51, "learning_rate": 1.245562130177515e-05, "loss": 0.4056, "step": 8883, "task_loss": 0.9298889636993408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3821551501750946, "epoch": 7.51, "learning_rate": 1.2451394759087067e-05, "loss": 0.4632, "step": 8884, "task_loss": 1.2720098495483398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24660804867744446, "epoch": 7.51, "learning_rate": 1.2447168216398985e-05, "loss": 0.412, "step": 8885, "task_loss": 0.6145820021629333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18818311393260956, "epoch": 7.51, "learning_rate": 1.2442941673710905e-05, "loss": 0.3328, "step": 8886, "task_loss": 0.4876258969306946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21004462242126465, "epoch": 7.51, "learning_rate": 1.2438715131022823e-05, "loss": 0.3378, "step": 8887, "task_loss": 0.4048168361186981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.434485524892807, "epoch": 7.51, "learning_rate": 1.2434488588334743e-05, "loss": 0.4139, "step": 8888, "task_loss": 1.3127778768539429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16074581444263458, "epoch": 7.51, "learning_rate": 1.2430262045646663e-05, "loss": 0.3653, "step": 8889, "task_loss": 0.9348604679107666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37428492307662964, "epoch": 7.51, "learning_rate": 1.242603550295858e-05, "loss": 0.4064, "step": 8890, "task_loss": 0.23023812472820282 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29853618144989014, "epoch": 7.52, "learning_rate": 1.2421808960270499e-05, "loss": 0.3676, "step": 8891, "task_loss": 0.26354339718818665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23341627418994904, "epoch": 7.52, "learning_rate": 1.2417582417582419e-05, "loss": 0.2787, "step": 8892, "task_loss": 0.0650770515203476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3779405653476715, "epoch": 7.52, "learning_rate": 1.2413355874894338e-05, "loss": 0.3873, "step": 8893, "task_loss": 0.2642820477485657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5107924938201904, "epoch": 7.52, "learning_rate": 1.2409129332206255e-05, "loss": 0.3938, "step": 8894, "task_loss": 0.9316972494125366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2726571261882782, "epoch": 7.52, "learning_rate": 1.2404902789518174e-05, "loss": 0.441, "step": 8895, "task_loss": 0.26469066739082336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24930131435394287, "epoch": 7.52, "learning_rate": 1.2400676246830094e-05, "loss": 0.4049, "step": 8896, "task_loss": 0.5534136295318604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4573225975036621, "epoch": 7.52, "learning_rate": 1.2396449704142012e-05, "loss": 0.3932, "step": 8897, "task_loss": 0.643497884273529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6484670042991638, "epoch": 7.52, "learning_rate": 1.239222316145393e-05, "loss": 0.3766, "step": 8898, "task_loss": 1.8177398443222046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6824172735214233, "epoch": 7.52, "learning_rate": 1.238799661876585e-05, "loss": 0.5384, "step": 8899, "task_loss": 1.3811885118484497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37835413217544556, "epoch": 7.52, "learning_rate": 1.238377007607777e-05, "loss": 0.3007, "step": 8900, "task_loss": 0.9385674595832825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28758400678634644, "epoch": 7.52, "learning_rate": 1.2379543533389688e-05, "loss": 0.3362, "step": 8901, "task_loss": 0.20636217296123505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4175105690956116, "epoch": 7.52, "learning_rate": 1.2375316990701606e-05, "loss": 0.4085, "step": 8902, "task_loss": 0.4218672215938568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31978416442871094, "epoch": 7.53, "learning_rate": 1.2371090448013526e-05, "loss": 0.3666, "step": 8903, "task_loss": 0.33239826560020447 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23870491981506348, "epoch": 7.53, "learning_rate": 1.2366863905325445e-05, "loss": 0.359, "step": 8904, "task_loss": 0.8117021918296814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3809123635292053, "epoch": 7.53, "learning_rate": 1.2362637362637363e-05, "loss": 0.405, "step": 8905, "task_loss": 0.6673400402069092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5134103298187256, "epoch": 7.53, "learning_rate": 1.2358410819949281e-05, "loss": 0.4659, "step": 8906, "task_loss": 0.5493857860565186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36607301235198975, "epoch": 7.53, "learning_rate": 1.2354184277261201e-05, "loss": 0.4395, "step": 8907, "task_loss": 0.7062835693359375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.522894561290741, "epoch": 7.53, "learning_rate": 1.2349957734573119e-05, "loss": 0.4391, "step": 8908, "task_loss": 0.36490681767463684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2178795486688614, "epoch": 7.53, "learning_rate": 1.2345731191885039e-05, "loss": 0.3607, "step": 8909, "task_loss": 0.2941228151321411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38187286257743835, "epoch": 7.53, "learning_rate": 1.2341504649196957e-05, "loss": 0.3477, "step": 8910, "task_loss": 0.275736004114151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5548045635223389, "epoch": 7.53, "learning_rate": 1.2337278106508877e-05, "loss": 0.4364, "step": 8911, "task_loss": 0.3776021897792816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32947251200675964, "epoch": 7.53, "learning_rate": 1.2333051563820795e-05, "loss": 0.5436, "step": 8912, "task_loss": 0.7478124499320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49149489402770996, "epoch": 7.53, "learning_rate": 1.2328825021132714e-05, "loss": 0.4001, "step": 8913, "task_loss": 0.4371243119239807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2830374836921692, "epoch": 7.53, "learning_rate": 1.2324598478444632e-05, "loss": 0.4831, "step": 8914, "task_loss": 1.4057505130767822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31978940963745117, "epoch": 7.54, "learning_rate": 1.2320371935756552e-05, "loss": 0.4154, "step": 8915, "task_loss": 0.9829380512237549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5351676344871521, "epoch": 7.54, "learning_rate": 1.231614539306847e-05, "loss": 0.3455, "step": 8916, "task_loss": 0.18762677907943726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.474325031042099, "epoch": 7.54, "learning_rate": 1.231191885038039e-05, "loss": 0.4569, "step": 8917, "task_loss": 0.8255453109741211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3540608882904053, "epoch": 7.54, "learning_rate": 1.230769230769231e-05, "loss": 0.4666, "step": 8918, "task_loss": 0.8022740483283997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33126741647720337, "epoch": 7.54, "learning_rate": 1.2303465765004226e-05, "loss": 0.4486, "step": 8919, "task_loss": 1.1576017141342163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6492169499397278, "epoch": 7.54, "learning_rate": 1.2299239222316146e-05, "loss": 0.4494, "step": 8920, "task_loss": 1.002586841583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5384097695350647, "epoch": 7.54, "learning_rate": 1.2295012679628066e-05, "loss": 0.5098, "step": 8921, "task_loss": 0.6145704388618469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32398903369903564, "epoch": 7.54, "learning_rate": 1.2290786136939984e-05, "loss": 0.3572, "step": 8922, "task_loss": 0.7566264867782593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2882049083709717, "epoch": 7.54, "learning_rate": 1.2286559594251902e-05, "loss": 0.2982, "step": 8923, "task_loss": 0.08466240763664246 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.11329734325408936, "epoch": 7.54, "learning_rate": 1.2282333051563821e-05, "loss": 0.3494, "step": 8924, "task_loss": 0.03128701075911522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35052669048309326, "epoch": 7.54, "learning_rate": 1.2278106508875741e-05, "loss": 0.3503, "step": 8925, "task_loss": 1.2916423082351685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42401400208473206, "epoch": 7.54, "learning_rate": 1.227387996618766e-05, "loss": 0.3693, "step": 8926, "task_loss": 1.1733742952346802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28700000047683716, "epoch": 7.55, "learning_rate": 1.2269653423499577e-05, "loss": 0.4651, "step": 8927, "task_loss": 0.7809203863143921 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3982163667678833, "epoch": 7.55, "learning_rate": 1.2265426880811497e-05, "loss": 0.3856, "step": 8928, "task_loss": 0.2948783338069916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4382515549659729, "epoch": 7.55, "learning_rate": 1.2261200338123415e-05, "loss": 0.3934, "step": 8929, "task_loss": 0.7454134225845337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6083048582077026, "epoch": 7.55, "learning_rate": 1.2256973795435335e-05, "loss": 0.47, "step": 8930, "task_loss": 0.6318076252937317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2604491710662842, "epoch": 7.55, "learning_rate": 1.2252747252747253e-05, "loss": 0.3542, "step": 8931, "task_loss": 0.828550398349762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25900179147720337, "epoch": 7.55, "learning_rate": 1.2248520710059173e-05, "loss": 0.3333, "step": 8932, "task_loss": 0.3528364598751068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43939876556396484, "epoch": 7.55, "learning_rate": 1.224429416737109e-05, "loss": 0.38, "step": 8933, "task_loss": 0.5450295805931091 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4828951954841614, "epoch": 7.55, "learning_rate": 1.224006762468301e-05, "loss": 0.361, "step": 8934, "task_loss": 1.1304030418395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.623633623123169, "epoch": 7.55, "learning_rate": 1.2235841081994928e-05, "loss": 0.4303, "step": 8935, "task_loss": 0.46177998185157776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17230787873268127, "epoch": 7.55, "learning_rate": 1.2231614539306848e-05, "loss": 0.3842, "step": 8936, "task_loss": 0.331039160490036 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22837987542152405, "epoch": 7.55, "learning_rate": 1.2227387996618766e-05, "loss": 0.3194, "step": 8937, "task_loss": 0.1575840413570404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3448342978954315, "epoch": 7.56, "learning_rate": 1.2223161453930686e-05, "loss": 0.4067, "step": 8938, "task_loss": 0.20840643346309662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36390653252601624, "epoch": 7.56, "learning_rate": 1.2218934911242604e-05, "loss": 0.3667, "step": 8939, "task_loss": 0.2343478500843048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25411197543144226, "epoch": 7.56, "learning_rate": 1.2214708368554522e-05, "loss": 0.3227, "step": 8940, "task_loss": 0.7479848861694336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1864277422428131, "epoch": 7.56, "learning_rate": 1.2210481825866442e-05, "loss": 0.3126, "step": 8941, "task_loss": 0.5073248147964478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4902167320251465, "epoch": 7.56, "learning_rate": 1.2206255283178361e-05, "loss": 0.4038, "step": 8942, "task_loss": 0.6532389521598816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6410514116287231, "epoch": 7.56, "learning_rate": 1.220202874049028e-05, "loss": 0.3922, "step": 8943, "task_loss": 1.1557260751724243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4287768304347992, "epoch": 7.56, "learning_rate": 1.2197802197802198e-05, "loss": 0.4335, "step": 8944, "task_loss": 1.068378210067749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34037670493125916, "epoch": 7.56, "learning_rate": 1.2193575655114117e-05, "loss": 0.3926, "step": 8945, "task_loss": 0.5771277546882629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3525254726409912, "epoch": 7.56, "learning_rate": 1.2189349112426037e-05, "loss": 0.4133, "step": 8946, "task_loss": 1.157086730003357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19959726929664612, "epoch": 7.56, "learning_rate": 1.2185122569737955e-05, "loss": 0.4065, "step": 8947, "task_loss": 0.19284602999687195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3066311478614807, "epoch": 7.56, "learning_rate": 1.2180896027049873e-05, "loss": 0.3785, "step": 8948, "task_loss": 0.8857675194740295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2674952745437622, "epoch": 7.56, "learning_rate": 1.2176669484361793e-05, "loss": 0.4164, "step": 8949, "task_loss": 0.1879102885723114 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47725144028663635, "epoch": 7.57, "learning_rate": 1.2172442941673713e-05, "loss": 0.3862, "step": 8950, "task_loss": 1.2685786485671997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.424798846244812, "epoch": 7.57, "learning_rate": 1.216821639898563e-05, "loss": 0.4041, "step": 8951, "task_loss": 0.7951120138168335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31424546241760254, "epoch": 7.57, "learning_rate": 1.2163989856297549e-05, "loss": 0.3701, "step": 8952, "task_loss": 1.26388680934906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46314823627471924, "epoch": 7.57, "learning_rate": 1.2159763313609468e-05, "loss": 0.5968, "step": 8953, "task_loss": 1.3174591064453125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4308896064758301, "epoch": 7.57, "learning_rate": 1.2155536770921386e-05, "loss": 0.4049, "step": 8954, "task_loss": 1.1437594890594482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3096659779548645, "epoch": 7.57, "learning_rate": 1.2151310228233306e-05, "loss": 0.3068, "step": 8955, "task_loss": 0.15330055356025696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3247291147708893, "epoch": 7.57, "learning_rate": 1.2147083685545224e-05, "loss": 0.2858, "step": 8956, "task_loss": 0.47926098108291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38016843795776367, "epoch": 7.57, "learning_rate": 1.2142857142857144e-05, "loss": 0.3266, "step": 8957, "task_loss": 0.4028185307979584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48601335287094116, "epoch": 7.57, "learning_rate": 1.2138630600169062e-05, "loss": 0.4343, "step": 8958, "task_loss": 0.1442585289478302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3384753167629242, "epoch": 7.57, "learning_rate": 1.2134404057480982e-05, "loss": 0.3032, "step": 8959, "task_loss": 0.25352779030799866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44721609354019165, "epoch": 7.57, "learning_rate": 1.21301775147929e-05, "loss": 0.427, "step": 8960, "task_loss": 1.0813796520233154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48134347796440125, "epoch": 7.57, "learning_rate": 1.2125950972104818e-05, "loss": 0.3702, "step": 8961, "task_loss": 0.17928604781627655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16162718832492828, "epoch": 7.58, "learning_rate": 1.2121724429416738e-05, "loss": 0.349, "step": 8962, "task_loss": 0.25479400157928467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32777196168899536, "epoch": 7.58, "learning_rate": 1.2117497886728657e-05, "loss": 0.3983, "step": 8963, "task_loss": 0.8173986673355103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5242729783058167, "epoch": 7.58, "learning_rate": 1.2113271344040575e-05, "loss": 0.3049, "step": 8964, "task_loss": 0.6155838966369629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40636640787124634, "epoch": 7.58, "learning_rate": 1.2109044801352493e-05, "loss": 0.3511, "step": 8965, "task_loss": 0.2831018269062042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.327983021736145, "epoch": 7.58, "learning_rate": 1.2104818258664413e-05, "loss": 0.2996, "step": 8966, "task_loss": 0.42353391647338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3352714776992798, "epoch": 7.58, "learning_rate": 1.2100591715976333e-05, "loss": 0.3305, "step": 8967, "task_loss": 0.6858367323875427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5846202969551086, "epoch": 7.58, "learning_rate": 1.2096365173288251e-05, "loss": 0.6059, "step": 8968, "task_loss": 0.2644737958908081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3736933469772339, "epoch": 7.58, "learning_rate": 1.2092138630600169e-05, "loss": 0.438, "step": 8969, "task_loss": 0.44630762934684753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2074945718050003, "epoch": 7.58, "learning_rate": 1.2087912087912089e-05, "loss": 0.3863, "step": 8970, "task_loss": 0.8092791438102722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.319236159324646, "epoch": 7.58, "learning_rate": 1.2083685545224008e-05, "loss": 0.3868, "step": 8971, "task_loss": 0.011440849862992764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3765392601490021, "epoch": 7.58, "learning_rate": 1.2079459002535925e-05, "loss": 0.3589, "step": 8972, "task_loss": 0.5851409435272217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3749988079071045, "epoch": 7.58, "learning_rate": 1.2075232459847845e-05, "loss": 0.3049, "step": 8973, "task_loss": 0.6993555426597595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44657981395721436, "epoch": 7.59, "learning_rate": 1.2071005917159764e-05, "loss": 0.4233, "step": 8974, "task_loss": 0.3421289324760437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29481810331344604, "epoch": 7.59, "learning_rate": 1.2066779374471682e-05, "loss": 0.4469, "step": 8975, "task_loss": 0.13945460319519043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5465073585510254, "epoch": 7.59, "learning_rate": 1.20625528317836e-05, "loss": 0.5047, "step": 8976, "task_loss": 1.0065362453460693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27463531494140625, "epoch": 7.59, "learning_rate": 1.205832628909552e-05, "loss": 0.4049, "step": 8977, "task_loss": 0.054988909512758255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33771124482154846, "epoch": 7.59, "learning_rate": 1.205409974640744e-05, "loss": 0.3692, "step": 8978, "task_loss": 0.3773528039455414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4338395595550537, "epoch": 7.59, "learning_rate": 1.2049873203719358e-05, "loss": 0.414, "step": 8979, "task_loss": 0.6713299751281738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2405247986316681, "epoch": 7.59, "learning_rate": 1.2045646661031278e-05, "loss": 0.3377, "step": 8980, "task_loss": 0.35603412985801697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4226876497268677, "epoch": 7.59, "learning_rate": 1.2041420118343196e-05, "loss": 0.3748, "step": 8981, "task_loss": 0.38470765948295593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1752716451883316, "epoch": 7.59, "learning_rate": 1.2037193575655115e-05, "loss": 0.3299, "step": 8982, "task_loss": 1.0044262409210205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8618215918540955, "epoch": 7.59, "learning_rate": 1.2032967032967033e-05, "loss": 0.4886, "step": 8983, "task_loss": 0.511257529258728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5087677836418152, "epoch": 7.59, "learning_rate": 1.2028740490278953e-05, "loss": 0.3417, "step": 8984, "task_loss": 0.618377685546875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24110707640647888, "epoch": 7.59, "learning_rate": 1.2024513947590871e-05, "loss": 0.3462, "step": 8985, "task_loss": 0.3202146291732788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42054715752601624, "epoch": 7.6, "learning_rate": 1.202028740490279e-05, "loss": 0.4972, "step": 8986, "task_loss": 0.19251422584056854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42426180839538574, "epoch": 7.6, "learning_rate": 1.2016060862214709e-05, "loss": 0.4202, "step": 8987, "task_loss": 0.45764419436454773 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37431126832962036, "epoch": 7.6, "learning_rate": 1.2011834319526629e-05, "loss": 0.4048, "step": 8988, "task_loss": 0.4787616729736328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2588847875595093, "epoch": 7.6, "learning_rate": 1.2007607776838547e-05, "loss": 0.4498, "step": 8989, "task_loss": 0.3828330934047699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7134664058685303, "epoch": 7.6, "learning_rate": 1.2003381234150465e-05, "loss": 0.475, "step": 8990, "task_loss": 1.1319533586502075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4578288793563843, "epoch": 7.6, "learning_rate": 1.1999154691462385e-05, "loss": 0.4477, "step": 8991, "task_loss": 0.7399687170982361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2551916241645813, "epoch": 7.6, "learning_rate": 1.1994928148774304e-05, "loss": 0.3173, "step": 8992, "task_loss": 0.17864371836185455 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2895351052284241, "epoch": 7.6, "learning_rate": 1.199070160608622e-05, "loss": 0.3202, "step": 8993, "task_loss": 0.3202516734600067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2535991370677948, "epoch": 7.6, "learning_rate": 1.198647506339814e-05, "loss": 0.3839, "step": 8994, "task_loss": 0.30042195320129395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3672291040420532, "epoch": 7.6, "learning_rate": 1.198224852071006e-05, "loss": 0.448, "step": 8995, "task_loss": 0.3989412486553192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24742627143859863, "epoch": 7.6, "learning_rate": 1.197802197802198e-05, "loss": 0.3432, "step": 8996, "task_loss": 0.3141601085662842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4100652039051056, "epoch": 7.6, "learning_rate": 1.1973795435333896e-05, "loss": 0.4238, "step": 8997, "task_loss": 0.10547729581594467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31135597825050354, "epoch": 7.61, "learning_rate": 1.1969568892645816e-05, "loss": 0.2401, "step": 8998, "task_loss": 0.39433011412620544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17257516086101532, "epoch": 7.61, "learning_rate": 1.1965342349957736e-05, "loss": 0.4469, "step": 8999, "task_loss": 0.3729266822338104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34770286083221436, "epoch": 7.61, "learning_rate": 1.1961115807269654e-05, "loss": 0.4468, "step": 9000, "task_loss": 0.03814266622066498 }, { "epoch": 7.61, "eval_accuracy": 0.9144158415841585, "eval_loss": 0.2698863446712494, "eval_runtime": 226.5568, "eval_samples_per_second": 111.451, "eval_steps_per_second": 0.874, "step": 9000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3650602102279663, "epoch": 7.61, "learning_rate": 1.1956889264581572e-05, "loss": 0.3361, "step": 9001, "task_loss": 0.6610453128814697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22278177738189697, "epoch": 7.61, "learning_rate": 1.1952662721893492e-05, "loss": 0.3935, "step": 9002, "task_loss": 0.08621159195899963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27735745906829834, "epoch": 7.61, "learning_rate": 1.1948436179205411e-05, "loss": 0.3915, "step": 9003, "task_loss": 0.27255043387413025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37566742300987244, "epoch": 7.61, "learning_rate": 1.194420963651733e-05, "loss": 0.3423, "step": 9004, "task_loss": 0.8501888513565063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1851779818534851, "epoch": 7.61, "learning_rate": 1.1939983093829247e-05, "loss": 0.3247, "step": 9005, "task_loss": 0.10457085072994232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24098354578018188, "epoch": 7.61, "learning_rate": 1.1935756551141167e-05, "loss": 0.3528, "step": 9006, "task_loss": 0.280144602060318 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36806613206863403, "epoch": 7.61, "learning_rate": 1.1931530008453085e-05, "loss": 0.4265, "step": 9007, "task_loss": 1.0227279663085938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2922498285770416, "epoch": 7.61, "learning_rate": 1.1927303465765005e-05, "loss": 0.3148, "step": 9008, "task_loss": 0.10302627831697464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4662601947784424, "epoch": 7.61, "learning_rate": 1.1923076923076925e-05, "loss": 0.4752, "step": 9009, "task_loss": 0.7276266813278198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5095608234405518, "epoch": 7.62, "learning_rate": 1.1918850380388843e-05, "loss": 0.4364, "step": 9010, "task_loss": 1.2232692241668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19969511032104492, "epoch": 7.62, "learning_rate": 1.191462383770076e-05, "loss": 0.3228, "step": 9011, "task_loss": 0.759532630443573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3929698169231415, "epoch": 7.62, "learning_rate": 1.191039729501268e-05, "loss": 0.4696, "step": 9012, "task_loss": 0.8393535017967224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5342813730239868, "epoch": 7.62, "learning_rate": 1.19061707523246e-05, "loss": 0.4763, "step": 9013, "task_loss": 1.1013715267181396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31900882720947266, "epoch": 7.62, "learning_rate": 1.1901944209636518e-05, "loss": 0.3288, "step": 9014, "task_loss": 0.5611660480499268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3892659544944763, "epoch": 7.62, "learning_rate": 1.1897717666948436e-05, "loss": 0.4729, "step": 9015, "task_loss": 0.9186141490936279 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24343809485435486, "epoch": 7.62, "learning_rate": 1.1893491124260356e-05, "loss": 0.3368, "step": 9016, "task_loss": 0.6908840537071228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4670860767364502, "epoch": 7.62, "learning_rate": 1.1889264581572276e-05, "loss": 0.5594, "step": 9017, "task_loss": 0.04985443130135536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22475585341453552, "epoch": 7.62, "learning_rate": 1.1885038038884192e-05, "loss": 0.3454, "step": 9018, "task_loss": 0.09994767606258392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4485097825527191, "epoch": 7.62, "learning_rate": 1.1880811496196112e-05, "loss": 0.3937, "step": 9019, "task_loss": 0.843705415725708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5624816417694092, "epoch": 7.62, "learning_rate": 1.1876584953508032e-05, "loss": 0.3209, "step": 9020, "task_loss": 0.7311621904373169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4427991509437561, "epoch": 7.63, "learning_rate": 1.187235841081995e-05, "loss": 0.3302, "step": 9021, "task_loss": 0.720029890537262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7290709614753723, "epoch": 7.63, "learning_rate": 1.1868131868131868e-05, "loss": 0.4931, "step": 9022, "task_loss": 0.9248529672622681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.593668520450592, "epoch": 7.63, "learning_rate": 1.1863905325443787e-05, "loss": 0.411, "step": 9023, "task_loss": 1.2561993598937988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3528909683227539, "epoch": 7.63, "learning_rate": 1.1859678782755707e-05, "loss": 0.4499, "step": 9024, "task_loss": 0.6737098693847656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5126925110816956, "epoch": 7.63, "learning_rate": 1.1855452240067625e-05, "loss": 0.5475, "step": 9025, "task_loss": 0.6768222451210022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30118900537490845, "epoch": 7.63, "learning_rate": 1.1851225697379543e-05, "loss": 0.3384, "step": 9026, "task_loss": 0.09395896643400192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33822476863861084, "epoch": 7.63, "learning_rate": 1.1846999154691463e-05, "loss": 0.4809, "step": 9027, "task_loss": 0.4942106008529663 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.531981885433197, "epoch": 7.63, "learning_rate": 1.1842772612003383e-05, "loss": 0.5651, "step": 9028, "task_loss": 0.6738797426223755 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26686498522758484, "epoch": 7.63, "learning_rate": 1.18385460693153e-05, "loss": 0.3826, "step": 9029, "task_loss": 0.8414326310157776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5362911224365234, "epoch": 7.63, "learning_rate": 1.1834319526627219e-05, "loss": 0.5541, "step": 9030, "task_loss": 1.2536871433258057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24590009450912476, "epoch": 7.63, "learning_rate": 1.1830092983939139e-05, "loss": 0.4244, "step": 9031, "task_loss": 0.1383514255285263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4412611722946167, "epoch": 7.63, "learning_rate": 1.1825866441251057e-05, "loss": 0.6291, "step": 9032, "task_loss": 0.48700597882270813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4063577950000763, "epoch": 7.64, "learning_rate": 1.1821639898562976e-05, "loss": 0.3208, "step": 9033, "task_loss": 0.7193591594696045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20122285187244415, "epoch": 7.64, "learning_rate": 1.1817413355874894e-05, "loss": 0.4108, "step": 9034, "task_loss": 0.023574326187372208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41745832562446594, "epoch": 7.64, "learning_rate": 1.1813186813186814e-05, "loss": 0.427, "step": 9035, "task_loss": 0.47051501274108887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3117230534553528, "epoch": 7.64, "learning_rate": 1.1808960270498732e-05, "loss": 0.3627, "step": 9036, "task_loss": 0.48757582902908325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2580595314502716, "epoch": 7.64, "learning_rate": 1.1804733727810652e-05, "loss": 0.4546, "step": 9037, "task_loss": 0.7048425078392029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46541884541511536, "epoch": 7.64, "learning_rate": 1.1800507185122572e-05, "loss": 0.4083, "step": 9038, "task_loss": 0.5239202976226807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17622461915016174, "epoch": 7.64, "learning_rate": 1.1796280642434488e-05, "loss": 0.3944, "step": 9039, "task_loss": 0.3850150406360626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2719927728176117, "epoch": 7.64, "learning_rate": 1.1792054099746408e-05, "loss": 0.2775, "step": 9040, "task_loss": 0.5853669047355652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23604165017604828, "epoch": 7.64, "learning_rate": 1.1787827557058327e-05, "loss": 0.3987, "step": 9041, "task_loss": 0.5697468519210815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32700788974761963, "epoch": 7.64, "learning_rate": 1.1783601014370246e-05, "loss": 0.4541, "step": 9042, "task_loss": 0.8926465511322021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37619954347610474, "epoch": 7.64, "learning_rate": 1.1779374471682164e-05, "loss": 0.47, "step": 9043, "task_loss": 1.0556493997573853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2214282602071762, "epoch": 7.64, "learning_rate": 1.1775147928994083e-05, "loss": 0.3603, "step": 9044, "task_loss": 0.8108346462249756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4927966892719269, "epoch": 7.65, "learning_rate": 1.1770921386306003e-05, "loss": 0.4528, "step": 9045, "task_loss": 1.1591700315475464 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4457417130470276, "epoch": 7.65, "learning_rate": 1.1766694843617921e-05, "loss": 0.4117, "step": 9046, "task_loss": 1.0482919216156006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22907936573028564, "epoch": 7.65, "learning_rate": 1.1762468300929839e-05, "loss": 0.4195, "step": 9047, "task_loss": 0.510393500328064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33623653650283813, "epoch": 7.65, "learning_rate": 1.1758241758241759e-05, "loss": 0.3139, "step": 9048, "task_loss": 0.8420602083206177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4092572033405304, "epoch": 7.65, "learning_rate": 1.1754015215553679e-05, "loss": 0.4391, "step": 9049, "task_loss": 0.44004857540130615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2946168780326843, "epoch": 7.65, "learning_rate": 1.1749788672865597e-05, "loss": 0.2832, "step": 9050, "task_loss": 0.16051824390888214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4082638919353485, "epoch": 7.65, "learning_rate": 1.1745562130177515e-05, "loss": 0.3427, "step": 9051, "task_loss": 0.4675252139568329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34707850217819214, "epoch": 7.65, "learning_rate": 1.1741335587489434e-05, "loss": 0.3775, "step": 9052, "task_loss": 0.49460896849632263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4688130021095276, "epoch": 7.65, "learning_rate": 1.1737109044801352e-05, "loss": 0.3211, "step": 9053, "task_loss": 0.39294153451919556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3327360153198242, "epoch": 7.65, "learning_rate": 1.1732882502113272e-05, "loss": 0.4655, "step": 9054, "task_loss": 0.6813886761665344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3562341332435608, "epoch": 7.65, "learning_rate": 1.172865595942519e-05, "loss": 0.3107, "step": 9055, "task_loss": 0.7206686735153198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22212161123752594, "epoch": 7.65, "learning_rate": 1.172442941673711e-05, "loss": 0.3308, "step": 9056, "task_loss": 0.5155127048492432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49100273847579956, "epoch": 7.66, "learning_rate": 1.1720202874049028e-05, "loss": 0.3125, "step": 9057, "task_loss": 0.8621499538421631 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4285551607608795, "epoch": 7.66, "learning_rate": 1.1715976331360948e-05, "loss": 0.3666, "step": 9058, "task_loss": 0.2908785045146942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30743879079818726, "epoch": 7.66, "learning_rate": 1.1711749788672866e-05, "loss": 0.3687, "step": 9059, "task_loss": 0.2561503052711487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40027230978012085, "epoch": 7.66, "learning_rate": 1.1707523245984786e-05, "loss": 0.3293, "step": 9060, "task_loss": 0.19694428145885468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5225579738616943, "epoch": 7.66, "learning_rate": 1.1703296703296704e-05, "loss": 0.4008, "step": 9061, "task_loss": 0.5215948224067688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42380574345588684, "epoch": 7.66, "learning_rate": 1.1699070160608623e-05, "loss": 0.4518, "step": 9062, "task_loss": 0.3542225658893585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.458027720451355, "epoch": 7.66, "learning_rate": 1.1694843617920541e-05, "loss": 0.4771, "step": 9063, "task_loss": 0.98842853307724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8132627606391907, "epoch": 7.66, "learning_rate": 1.169061707523246e-05, "loss": 0.3968, "step": 9064, "task_loss": 1.0664492845535278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3299165368080139, "epoch": 7.66, "learning_rate": 1.168639053254438e-05, "loss": 0.3946, "step": 9065, "task_loss": 0.9507863521575928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21241885423660278, "epoch": 7.66, "learning_rate": 1.1682163989856299e-05, "loss": 0.2898, "step": 9066, "task_loss": 0.19051221013069153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4213418960571289, "epoch": 7.66, "learning_rate": 1.1677937447168217e-05, "loss": 0.3457, "step": 9067, "task_loss": 0.9177011251449585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5141689777374268, "epoch": 7.66, "learning_rate": 1.1673710904480135e-05, "loss": 0.4827, "step": 9068, "task_loss": 0.5235229730606079 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3967817425727844, "epoch": 7.67, "learning_rate": 1.1669484361792055e-05, "loss": 0.4485, "step": 9069, "task_loss": 0.23925037682056427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34805598855018616, "epoch": 7.67, "learning_rate": 1.1665257819103974e-05, "loss": 0.4355, "step": 9070, "task_loss": 0.8334130048751831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6072264313697815, "epoch": 7.67, "learning_rate": 1.1661031276415893e-05, "loss": 0.4336, "step": 9071, "task_loss": 0.21760205924510956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.536931037902832, "epoch": 7.67, "learning_rate": 1.165680473372781e-05, "loss": 0.346, "step": 9072, "task_loss": 0.5139984488487244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3435274660587311, "epoch": 7.67, "learning_rate": 1.165257819103973e-05, "loss": 0.5243, "step": 9073, "task_loss": 0.4083363115787506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4037647247314453, "epoch": 7.67, "learning_rate": 1.1648351648351648e-05, "loss": 0.3892, "step": 9074, "task_loss": 0.2817915678024292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2859882116317749, "epoch": 7.67, "learning_rate": 1.1644125105663568e-05, "loss": 0.4101, "step": 9075, "task_loss": 0.303874671459198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46814122796058655, "epoch": 7.67, "learning_rate": 1.1639898562975486e-05, "loss": 0.53, "step": 9076, "task_loss": 0.6683230400085449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4919140338897705, "epoch": 7.67, "learning_rate": 1.1635672020287406e-05, "loss": 0.5234, "step": 9077, "task_loss": 0.7466844320297241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3255879878997803, "epoch": 7.67, "learning_rate": 1.1631445477599324e-05, "loss": 0.3179, "step": 9078, "task_loss": 0.5697146654129028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44893962144851685, "epoch": 7.67, "learning_rate": 1.1627218934911244e-05, "loss": 0.4497, "step": 9079, "task_loss": 0.9178148508071899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28595617413520813, "epoch": 7.67, "learning_rate": 1.1622992392223162e-05, "loss": 0.4128, "step": 9080, "task_loss": 0.3437766432762146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29790395498275757, "epoch": 7.68, "learning_rate": 1.1618765849535081e-05, "loss": 0.3494, "step": 9081, "task_loss": 0.3793984055519104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35919150710105896, "epoch": 7.68, "learning_rate": 1.1614539306847e-05, "loss": 0.2733, "step": 9082, "task_loss": 1.0086805820465088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36165302991867065, "epoch": 7.68, "learning_rate": 1.161031276415892e-05, "loss": 0.3277, "step": 9083, "task_loss": 0.5117413997650146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4655190706253052, "epoch": 7.68, "learning_rate": 1.1606086221470837e-05, "loss": 0.382, "step": 9084, "task_loss": 0.49258899688720703 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35827407240867615, "epoch": 7.68, "learning_rate": 1.1601859678782755e-05, "loss": 0.4059, "step": 9085, "task_loss": 0.7047827243804932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5570493340492249, "epoch": 7.68, "learning_rate": 1.1597633136094675e-05, "loss": 0.4489, "step": 9086, "task_loss": 0.48567521572113037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28169572353363037, "epoch": 7.68, "learning_rate": 1.1593406593406595e-05, "loss": 0.4144, "step": 9087, "task_loss": 0.1504969447851181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42657098174095154, "epoch": 7.68, "learning_rate": 1.1589180050718513e-05, "loss": 0.4804, "step": 9088, "task_loss": 0.9591243267059326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.585813581943512, "epoch": 7.68, "learning_rate": 1.1584953508030431e-05, "loss": 0.4369, "step": 9089, "task_loss": 0.9274604320526123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4261697828769684, "epoch": 7.68, "learning_rate": 1.158072696534235e-05, "loss": 0.4439, "step": 9090, "task_loss": 0.80622798204422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4504546821117401, "epoch": 7.68, "learning_rate": 1.157650042265427e-05, "loss": 0.487, "step": 9091, "task_loss": 1.0099345445632935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4317294955253601, "epoch": 7.69, "learning_rate": 1.1572273879966188e-05, "loss": 0.3498, "step": 9092, "task_loss": 0.5249655842781067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2850191593170166, "epoch": 7.69, "learning_rate": 1.1568047337278106e-05, "loss": 0.4002, "step": 9093, "task_loss": 1.681335210800171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48124194145202637, "epoch": 7.69, "learning_rate": 1.1563820794590026e-05, "loss": 0.2992, "step": 9094, "task_loss": 0.43444743752479553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38189244270324707, "epoch": 7.69, "learning_rate": 1.1559594251901946e-05, "loss": 0.3871, "step": 9095, "task_loss": 0.600250244140625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26120293140411377, "epoch": 7.69, "learning_rate": 1.1555367709213864e-05, "loss": 0.3968, "step": 9096, "task_loss": 0.2040611058473587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2957070767879486, "epoch": 7.69, "learning_rate": 1.1551141166525782e-05, "loss": 0.449, "step": 9097, "task_loss": 0.41480839252471924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31503891944885254, "epoch": 7.69, "learning_rate": 1.1546914623837702e-05, "loss": 0.5554, "step": 9098, "task_loss": 0.9648346900939941 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5616080164909363, "epoch": 7.69, "learning_rate": 1.154268808114962e-05, "loss": 0.4653, "step": 9099, "task_loss": 0.39072370529174805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28910255432128906, "epoch": 7.69, "learning_rate": 1.153846153846154e-05, "loss": 0.3073, "step": 9100, "task_loss": 0.9958504438400269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3099061846733093, "epoch": 7.69, "learning_rate": 1.1534234995773458e-05, "loss": 0.322, "step": 9101, "task_loss": 0.23545800149440765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20131778717041016, "epoch": 7.69, "learning_rate": 1.1530008453085377e-05, "loss": 0.3606, "step": 9102, "task_loss": 0.21416786313056946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5189632773399353, "epoch": 7.69, "learning_rate": 1.1525781910397295e-05, "loss": 0.4709, "step": 9103, "task_loss": 1.019541621208191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.249685600399971, "epoch": 7.7, "learning_rate": 1.1521555367709215e-05, "loss": 0.3352, "step": 9104, "task_loss": 0.8379485011100769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30412718653678894, "epoch": 7.7, "learning_rate": 1.1517328825021133e-05, "loss": 0.2994, "step": 9105, "task_loss": 0.2942968010902405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19457846879959106, "epoch": 7.7, "learning_rate": 1.1513102282333051e-05, "loss": 0.3293, "step": 9106, "task_loss": 0.2578124701976776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38116198778152466, "epoch": 7.7, "learning_rate": 1.1508875739644971e-05, "loss": 0.3621, "step": 9107, "task_loss": 0.43714895844459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3668285012245178, "epoch": 7.7, "learning_rate": 1.150464919695689e-05, "loss": 0.4831, "step": 9108, "task_loss": 0.22167527675628662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4199523329734802, "epoch": 7.7, "learning_rate": 1.1500422654268809e-05, "loss": 0.3866, "step": 9109, "task_loss": 0.8118433952331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4506899118423462, "epoch": 7.7, "learning_rate": 1.1496196111580727e-05, "loss": 0.451, "step": 9110, "task_loss": 0.9876567125320435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25918519496917725, "epoch": 7.7, "learning_rate": 1.1491969568892646e-05, "loss": 0.3452, "step": 9111, "task_loss": 0.4457249343395233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4185684323310852, "epoch": 7.7, "learning_rate": 1.1487743026204566e-05, "loss": 0.3363, "step": 9112, "task_loss": 0.31305286288261414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17713600397109985, "epoch": 7.7, "learning_rate": 1.1483516483516484e-05, "loss": 0.2529, "step": 9113, "task_loss": 0.6266197562217712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.416693776845932, "epoch": 7.7, "learning_rate": 1.1479289940828402e-05, "loss": 0.469, "step": 9114, "task_loss": 0.3884761333465576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20577973127365112, "epoch": 7.7, "learning_rate": 1.1475063398140322e-05, "loss": 0.4823, "step": 9115, "task_loss": 0.6675399541854858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2792292535305023, "epoch": 7.71, "learning_rate": 1.1470836855452242e-05, "loss": 0.3514, "step": 9116, "task_loss": 0.2170674204826355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.644379198551178, "epoch": 7.71, "learning_rate": 1.1466610312764158e-05, "loss": 0.4996, "step": 9117, "task_loss": 0.6043576002120972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3493366241455078, "epoch": 7.71, "learning_rate": 1.1462383770076078e-05, "loss": 0.405, "step": 9118, "task_loss": 0.6413025856018066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4722885191440582, "epoch": 7.71, "learning_rate": 1.1458157227387998e-05, "loss": 0.4215, "step": 9119, "task_loss": 0.4085198640823364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27123552560806274, "epoch": 7.71, "learning_rate": 1.1453930684699916e-05, "loss": 0.3952, "step": 9120, "task_loss": 0.18777170777320862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2510784864425659, "epoch": 7.71, "learning_rate": 1.1449704142011834e-05, "loss": 0.369, "step": 9121, "task_loss": 0.6821796894073486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28577256202697754, "epoch": 7.71, "learning_rate": 1.1445477599323753e-05, "loss": 0.3817, "step": 9122, "task_loss": 0.797143280506134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29055824875831604, "epoch": 7.71, "learning_rate": 1.1441251056635673e-05, "loss": 0.4995, "step": 9123, "task_loss": 0.2320108562707901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43018925189971924, "epoch": 7.71, "learning_rate": 1.1437024513947591e-05, "loss": 0.3703, "step": 9124, "task_loss": 1.1138101816177368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4061700403690338, "epoch": 7.71, "learning_rate": 1.143279797125951e-05, "loss": 0.4537, "step": 9125, "task_loss": 0.40589240193367004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2310669720172882, "epoch": 7.71, "learning_rate": 1.1428571428571429e-05, "loss": 0.3284, "step": 9126, "task_loss": 0.3701762855052948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4331786036491394, "epoch": 7.71, "learning_rate": 1.1424344885883349e-05, "loss": 0.3767, "step": 9127, "task_loss": 0.25957462191581726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35757148265838623, "epoch": 7.72, "learning_rate": 1.1420118343195267e-05, "loss": 0.4102, "step": 9128, "task_loss": 0.4701923429965973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27205705642700195, "epoch": 7.72, "learning_rate": 1.1415891800507187e-05, "loss": 0.3504, "step": 9129, "task_loss": 0.8580575585365295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30253949761390686, "epoch": 7.72, "learning_rate": 1.1411665257819105e-05, "loss": 0.3541, "step": 9130, "task_loss": 0.3411189913749695 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.311810702085495, "epoch": 7.72, "learning_rate": 1.1407438715131023e-05, "loss": 0.4661, "step": 9131, "task_loss": 0.34442612528800964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37009477615356445, "epoch": 7.72, "learning_rate": 1.1403212172442942e-05, "loss": 0.3434, "step": 9132, "task_loss": 0.49225327372550964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18273411691188812, "epoch": 7.72, "learning_rate": 1.1398985629754862e-05, "loss": 0.4354, "step": 9133, "task_loss": 0.06520560383796692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3126942217350006, "epoch": 7.72, "learning_rate": 1.139475908706678e-05, "loss": 0.4715, "step": 9134, "task_loss": 0.8652362823486328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23995766043663025, "epoch": 7.72, "learning_rate": 1.1390532544378698e-05, "loss": 0.3971, "step": 9135, "task_loss": 0.9217087626457214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3310176432132721, "epoch": 7.72, "learning_rate": 1.1386306001690618e-05, "loss": 0.4764, "step": 9136, "task_loss": 0.9857785701751709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29179105162620544, "epoch": 7.72, "learning_rate": 1.1382079459002538e-05, "loss": 0.4123, "step": 9137, "task_loss": 0.035466741770505905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33696579933166504, "epoch": 7.72, "learning_rate": 1.1377852916314454e-05, "loss": 0.3377, "step": 9138, "task_loss": 0.30561596155166626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3339374363422394, "epoch": 7.72, "learning_rate": 1.1373626373626374e-05, "loss": 0.5173, "step": 9139, "task_loss": 0.6568589210510254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7359925508499146, "epoch": 7.73, "learning_rate": 1.1369399830938294e-05, "loss": 0.57, "step": 9140, "task_loss": 0.9451598525047302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3499267101287842, "epoch": 7.73, "learning_rate": 1.1365173288250212e-05, "loss": 0.3838, "step": 9141, "task_loss": 0.5426594018936157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33579370379447937, "epoch": 7.73, "learning_rate": 1.136094674556213e-05, "loss": 0.4124, "step": 9142, "task_loss": 0.8392094373703003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36561277508735657, "epoch": 7.73, "learning_rate": 1.135672020287405e-05, "loss": 0.5608, "step": 9143, "task_loss": 0.7134408354759216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32397326827049255, "epoch": 7.73, "learning_rate": 1.1352493660185969e-05, "loss": 0.4113, "step": 9144, "task_loss": 1.3163927793502808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42200130224227905, "epoch": 7.73, "learning_rate": 1.1348267117497887e-05, "loss": 0.3092, "step": 9145, "task_loss": 0.6292231678962708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2413388192653656, "epoch": 7.73, "learning_rate": 1.1344040574809805e-05, "loss": 0.309, "step": 9146, "task_loss": 0.23417268693447113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7151644229888916, "epoch": 7.73, "learning_rate": 1.1339814032121725e-05, "loss": 0.3718, "step": 9147, "task_loss": 0.6130445599555969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35301852226257324, "epoch": 7.73, "learning_rate": 1.1335587489433645e-05, "loss": 0.4311, "step": 9148, "task_loss": 0.8772244453430176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41327613592147827, "epoch": 7.73, "learning_rate": 1.1331360946745563e-05, "loss": 0.33, "step": 9149, "task_loss": 0.3825591206550598 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2858465313911438, "epoch": 7.73, "learning_rate": 1.132713440405748e-05, "loss": 0.3447, "step": 9150, "task_loss": 0.6044866442680359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.360195517539978, "epoch": 7.73, "learning_rate": 1.13229078613694e-05, "loss": 0.4387, "step": 9151, "task_loss": 1.3296058177947998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5096611380577087, "epoch": 7.74, "learning_rate": 1.1318681318681319e-05, "loss": 0.3822, "step": 9152, "task_loss": 0.41109979152679443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.375434935092926, "epoch": 7.74, "learning_rate": 1.1314454775993238e-05, "loss": 0.471, "step": 9153, "task_loss": 0.08674095571041107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28952664136886597, "epoch": 7.74, "learning_rate": 1.1310228233305156e-05, "loss": 0.3473, "step": 9154, "task_loss": 0.633596658706665 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49977564811706543, "epoch": 7.74, "learning_rate": 1.1306001690617076e-05, "loss": 0.4484, "step": 9155, "task_loss": 0.6473640203475952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31836628913879395, "epoch": 7.74, "learning_rate": 1.1301775147928994e-05, "loss": 0.3315, "step": 9156, "task_loss": 0.5128940939903259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39224445819854736, "epoch": 7.74, "learning_rate": 1.1297548605240914e-05, "loss": 0.4006, "step": 9157, "task_loss": 0.6004794239997864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2795179784297943, "epoch": 7.74, "learning_rate": 1.1293322062552834e-05, "loss": 0.3781, "step": 9158, "task_loss": 1.411116123199463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30549338459968567, "epoch": 7.74, "learning_rate": 1.1289095519864752e-05, "loss": 0.378, "step": 9159, "task_loss": 0.46297594904899597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.527697741985321, "epoch": 7.74, "learning_rate": 1.128486897717667e-05, "loss": 0.4234, "step": 9160, "task_loss": 0.6183108687400818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3122134804725647, "epoch": 7.74, "learning_rate": 1.128064243448859e-05, "loss": 0.452, "step": 9161, "task_loss": 0.4861637055873871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.453617662191391, "epoch": 7.74, "learning_rate": 1.1276415891800509e-05, "loss": 0.5029, "step": 9162, "task_loss": 1.1435117721557617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43289196491241455, "epoch": 7.75, "learning_rate": 1.1272189349112425e-05, "loss": 0.4259, "step": 9163, "task_loss": 0.2819502651691437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28781914710998535, "epoch": 7.75, "learning_rate": 1.1267962806424345e-05, "loss": 0.408, "step": 9164, "task_loss": 0.6944119930267334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2532561719417572, "epoch": 7.75, "learning_rate": 1.1263736263736265e-05, "loss": 0.3105, "step": 9165, "task_loss": 0.24177269637584686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1595858931541443, "epoch": 7.75, "learning_rate": 1.1259509721048183e-05, "loss": 0.283, "step": 9166, "task_loss": 0.057622428983449936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.602941632270813, "epoch": 7.75, "learning_rate": 1.1255283178360101e-05, "loss": 0.4609, "step": 9167, "task_loss": 1.3158483505249023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2732189893722534, "epoch": 7.75, "learning_rate": 1.125105663567202e-05, "loss": 0.3588, "step": 9168, "task_loss": 0.4310460388660431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23886744678020477, "epoch": 7.75, "learning_rate": 1.124683009298394e-05, "loss": 0.288, "step": 9169, "task_loss": 0.29517704248428345 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6346036195755005, "epoch": 7.75, "learning_rate": 1.1242603550295859e-05, "loss": 0.4723, "step": 9170, "task_loss": 1.07674241065979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4503568112850189, "epoch": 7.75, "learning_rate": 1.1238377007607777e-05, "loss": 0.4243, "step": 9171, "task_loss": 0.843457818031311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49291354417800903, "epoch": 7.75, "learning_rate": 1.1234150464919696e-05, "loss": 0.4708, "step": 9172, "task_loss": 0.3602895438671112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4171884059906006, "epoch": 7.75, "learning_rate": 1.1229923922231614e-05, "loss": 0.4086, "step": 9173, "task_loss": 0.33604615926742554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3416578769683838, "epoch": 7.75, "learning_rate": 1.1225697379543534e-05, "loss": 0.3545, "step": 9174, "task_loss": 0.47816404700279236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3527067303657532, "epoch": 7.76, "learning_rate": 1.1221470836855452e-05, "loss": 0.397, "step": 9175, "task_loss": 0.38562795519828796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5763547420501709, "epoch": 7.76, "learning_rate": 1.1217244294167372e-05, "loss": 0.4067, "step": 9176, "task_loss": 0.46796807646751404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2539387345314026, "epoch": 7.76, "learning_rate": 1.121301775147929e-05, "loss": 0.4237, "step": 9177, "task_loss": 0.447013795375824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41170811653137207, "epoch": 7.76, "learning_rate": 1.120879120879121e-05, "loss": 0.4114, "step": 9178, "task_loss": 0.5371567606925964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26679712533950806, "epoch": 7.76, "learning_rate": 1.1204564666103128e-05, "loss": 0.3533, "step": 9179, "task_loss": 0.3341583013534546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5373728275299072, "epoch": 7.76, "learning_rate": 1.1200338123415047e-05, "loss": 0.4948, "step": 9180, "task_loss": 0.5498508810997009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23268887400627136, "epoch": 7.76, "learning_rate": 1.1196111580726966e-05, "loss": 0.4503, "step": 9181, "task_loss": 0.30625489354133606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22331435978412628, "epoch": 7.76, "learning_rate": 1.1191885038038885e-05, "loss": 0.4099, "step": 9182, "task_loss": 0.6334330439567566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3860294818878174, "epoch": 7.76, "learning_rate": 1.1187658495350803e-05, "loss": 0.3311, "step": 9183, "task_loss": 0.9726356267929077 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39235448837280273, "epoch": 7.76, "learning_rate": 1.1183431952662721e-05, "loss": 0.3361, "step": 9184, "task_loss": 0.7921847701072693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3648669123649597, "epoch": 7.76, "learning_rate": 1.1179205409974641e-05, "loss": 0.412, "step": 9185, "task_loss": 0.4138137698173523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4430561661720276, "epoch": 7.76, "learning_rate": 1.117497886728656e-05, "loss": 0.327, "step": 9186, "task_loss": 0.777010440826416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3377532362937927, "epoch": 7.77, "learning_rate": 1.1170752324598479e-05, "loss": 0.3774, "step": 9187, "task_loss": 0.3540569245815277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2534089684486389, "epoch": 7.77, "learning_rate": 1.1166525781910397e-05, "loss": 0.3343, "step": 9188, "task_loss": 0.20269900560379028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2634657621383667, "epoch": 7.77, "learning_rate": 1.1162299239222317e-05, "loss": 0.3443, "step": 9189, "task_loss": 0.12720976769924164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2876060903072357, "epoch": 7.77, "learning_rate": 1.1158072696534236e-05, "loss": 0.4041, "step": 9190, "task_loss": 0.16513416171073914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6772826313972473, "epoch": 7.77, "learning_rate": 1.1153846153846154e-05, "loss": 0.3405, "step": 9191, "task_loss": 0.8949530124664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29366540908813477, "epoch": 7.77, "learning_rate": 1.1149619611158072e-05, "loss": 0.3496, "step": 9192, "task_loss": 0.1253514289855957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38304272294044495, "epoch": 7.77, "learning_rate": 1.1145393068469992e-05, "loss": 0.3894, "step": 9193, "task_loss": 0.4890056848526001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5221858620643616, "epoch": 7.77, "learning_rate": 1.1141166525781912e-05, "loss": 0.4178, "step": 9194, "task_loss": 0.8837781548500061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1953422725200653, "epoch": 7.77, "learning_rate": 1.113693998309383e-05, "loss": 0.4303, "step": 9195, "task_loss": 0.48021215200424194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20505984127521515, "epoch": 7.77, "learning_rate": 1.1132713440405748e-05, "loss": 0.3264, "step": 9196, "task_loss": 0.8538981080055237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3823113739490509, "epoch": 7.77, "learning_rate": 1.1128486897717668e-05, "loss": 0.4258, "step": 9197, "task_loss": 0.9584882259368896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.436233788728714, "epoch": 7.77, "learning_rate": 1.1124260355029586e-05, "loss": 0.349, "step": 9198, "task_loss": 0.11669134348630905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27526962757110596, "epoch": 7.78, "learning_rate": 1.1120033812341506e-05, "loss": 0.3592, "step": 9199, "task_loss": 0.2844611406326294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3739403784275055, "epoch": 7.78, "learning_rate": 1.1115807269653424e-05, "loss": 0.4449, "step": 9200, "task_loss": 1.0262497663497925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33324164152145386, "epoch": 7.78, "learning_rate": 1.1111580726965343e-05, "loss": 0.4403, "step": 9201, "task_loss": 0.46012914180755615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2956051826477051, "epoch": 7.78, "learning_rate": 1.1107354184277261e-05, "loss": 0.3194, "step": 9202, "task_loss": 0.8431546092033386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42520707845687866, "epoch": 7.78, "learning_rate": 1.1103127641589181e-05, "loss": 0.3514, "step": 9203, "task_loss": 1.8556407690048218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3355869948863983, "epoch": 7.78, "learning_rate": 1.10989010989011e-05, "loss": 0.3233, "step": 9204, "task_loss": 0.34708765149116516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37278473377227783, "epoch": 7.78, "learning_rate": 1.1094674556213017e-05, "loss": 0.4773, "step": 9205, "task_loss": 0.42556461691856384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5236413478851318, "epoch": 7.78, "learning_rate": 1.1090448013524937e-05, "loss": 0.4396, "step": 9206, "task_loss": 0.5779834389686584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3706008195877075, "epoch": 7.78, "learning_rate": 1.1086221470836857e-05, "loss": 0.4711, "step": 9207, "task_loss": 0.6224589347839355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3697749972343445, "epoch": 7.78, "learning_rate": 1.1081994928148775e-05, "loss": 0.367, "step": 9208, "task_loss": 0.3714015781879425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38036292791366577, "epoch": 7.78, "learning_rate": 1.1077768385460693e-05, "loss": 0.4015, "step": 9209, "task_loss": 0.43919825553894043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44260141253471375, "epoch": 7.78, "learning_rate": 1.1073541842772613e-05, "loss": 0.4259, "step": 9210, "task_loss": 0.6497274041175842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47996985912323, "epoch": 7.79, "learning_rate": 1.1069315300084532e-05, "loss": 0.4164, "step": 9211, "task_loss": 0.3929479122161865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6918481588363647, "epoch": 7.79, "learning_rate": 1.106508875739645e-05, "loss": 0.5524, "step": 9212, "task_loss": 0.9093455076217651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30319127440452576, "epoch": 7.79, "learning_rate": 1.1060862214708368e-05, "loss": 0.373, "step": 9213, "task_loss": 0.055270709097385406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29538580775260925, "epoch": 7.79, "learning_rate": 1.1056635672020288e-05, "loss": 0.554, "step": 9214, "task_loss": 0.2552472949028015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3128582835197449, "epoch": 7.79, "learning_rate": 1.1052409129332208e-05, "loss": 0.352, "step": 9215, "task_loss": 0.24531985819339752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5729312896728516, "epoch": 7.79, "learning_rate": 1.1048182586644126e-05, "loss": 0.3462, "step": 9216, "task_loss": 0.6595221757888794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2506973147392273, "epoch": 7.79, "learning_rate": 1.1043956043956044e-05, "loss": 0.3554, "step": 9217, "task_loss": 0.49035191535949707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.605094850063324, "epoch": 7.79, "learning_rate": 1.1039729501267964e-05, "loss": 0.4557, "step": 9218, "task_loss": 0.6452468633651733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5192772746086121, "epoch": 7.79, "learning_rate": 1.1035502958579882e-05, "loss": 0.3782, "step": 9219, "task_loss": 0.08161477744579315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1927189826965332, "epoch": 7.79, "learning_rate": 1.1031276415891801e-05, "loss": 0.2816, "step": 9220, "task_loss": 0.5210713744163513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3268837332725525, "epoch": 7.79, "learning_rate": 1.102704987320372e-05, "loss": 0.437, "step": 9221, "task_loss": 0.2795208692550659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38489678502082825, "epoch": 7.79, "learning_rate": 1.102282333051564e-05, "loss": 0.3736, "step": 9222, "task_loss": 1.000780463218689 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3962850570678711, "epoch": 7.8, "learning_rate": 1.1018596787827557e-05, "loss": 0.3243, "step": 9223, "task_loss": 0.8263521194458008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19328072667121887, "epoch": 7.8, "learning_rate": 1.1014370245139477e-05, "loss": 0.3702, "step": 9224, "task_loss": 0.7977550029754639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.782816469669342, "epoch": 7.8, "learning_rate": 1.1010143702451395e-05, "loss": 0.4765, "step": 9225, "task_loss": 0.9350370168685913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6613709926605225, "epoch": 7.8, "learning_rate": 1.1005917159763315e-05, "loss": 0.527, "step": 9226, "task_loss": 1.4070037603378296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4617674946784973, "epoch": 7.8, "learning_rate": 1.1001690617075233e-05, "loss": 0.4533, "step": 9227, "task_loss": 1.694656491279602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3281554877758026, "epoch": 7.8, "learning_rate": 1.0997464074387153e-05, "loss": 0.4368, "step": 9228, "task_loss": 0.40840476751327515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4342308044433594, "epoch": 7.8, "learning_rate": 1.099323753169907e-05, "loss": 0.3637, "step": 9229, "task_loss": 0.6556279063224792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36412039399147034, "epoch": 7.8, "learning_rate": 1.0989010989010989e-05, "loss": 0.4814, "step": 9230, "task_loss": 0.44055911898612976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4204012453556061, "epoch": 7.8, "learning_rate": 1.0984784446322908e-05, "loss": 0.3378, "step": 9231, "task_loss": 0.2857893705368042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5335394144058228, "epoch": 7.8, "learning_rate": 1.0980557903634828e-05, "loss": 0.4346, "step": 9232, "task_loss": 0.5185115337371826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3753792643547058, "epoch": 7.8, "learning_rate": 1.0976331360946746e-05, "loss": 0.3553, "step": 9233, "task_loss": 0.9264285564422607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20593991875648499, "epoch": 7.81, "learning_rate": 1.0972104818258664e-05, "loss": 0.3257, "step": 9234, "task_loss": 0.3912558853626251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2786403000354767, "epoch": 7.81, "learning_rate": 1.0967878275570584e-05, "loss": 0.4595, "step": 9235, "task_loss": 0.7157214283943176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4143175482749939, "epoch": 7.81, "learning_rate": 1.0963651732882504e-05, "loss": 0.4229, "step": 9236, "task_loss": 0.24216699600219727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.50058913230896, "epoch": 7.81, "learning_rate": 1.095942519019442e-05, "loss": 0.3806, "step": 9237, "task_loss": 1.2244868278503418 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3064241111278534, "epoch": 7.81, "learning_rate": 1.095519864750634e-05, "loss": 0.3839, "step": 9238, "task_loss": 0.25598084926605225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4368259012699127, "epoch": 7.81, "learning_rate": 1.095097210481826e-05, "loss": 0.2959, "step": 9239, "task_loss": 0.39154309034347534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.10724813491106033, "epoch": 7.81, "learning_rate": 1.094674556213018e-05, "loss": 0.4351, "step": 9240, "task_loss": 0.007359111215919256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23296202719211578, "epoch": 7.81, "learning_rate": 1.0942519019442096e-05, "loss": 0.4339, "step": 9241, "task_loss": 0.09631559997797012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43050095438957214, "epoch": 7.81, "learning_rate": 1.0938292476754015e-05, "loss": 0.3819, "step": 9242, "task_loss": 0.5619154572486877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24690952897071838, "epoch": 7.81, "learning_rate": 1.0934065934065935e-05, "loss": 0.3671, "step": 9243, "task_loss": 0.3900529146194458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3097602427005768, "epoch": 7.81, "learning_rate": 1.0929839391377853e-05, "loss": 0.3543, "step": 9244, "task_loss": 0.7247515320777893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6165492534637451, "epoch": 7.81, "learning_rate": 1.0925612848689773e-05, "loss": 0.5918, "step": 9245, "task_loss": 0.8551908731460571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2913127839565277, "epoch": 7.82, "learning_rate": 1.0921386306001691e-05, "loss": 0.4532, "step": 9246, "task_loss": 0.057152606546878815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.343362957239151, "epoch": 7.82, "learning_rate": 1.091715976331361e-05, "loss": 0.392, "step": 9247, "task_loss": 1.0392510890960693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3892924189567566, "epoch": 7.82, "learning_rate": 1.0912933220625529e-05, "loss": 0.4225, "step": 9248, "task_loss": 0.7380716800689697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5509169101715088, "epoch": 7.82, "learning_rate": 1.0908706677937448e-05, "loss": 0.4196, "step": 9249, "task_loss": 1.1596014499664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3979646563529968, "epoch": 7.82, "learning_rate": 1.0904480135249366e-05, "loss": 0.4497, "step": 9250, "task_loss": 0.6002991795539856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25178012251853943, "epoch": 7.82, "learning_rate": 1.0900253592561285e-05, "loss": 0.5752, "step": 9251, "task_loss": 0.5575987100601196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27283474802970886, "epoch": 7.82, "learning_rate": 1.0896027049873204e-05, "loss": 0.3042, "step": 9252, "task_loss": 0.8232729434967041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33235543966293335, "epoch": 7.82, "learning_rate": 1.0891800507185124e-05, "loss": 0.3792, "step": 9253, "task_loss": 0.41671988368034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27926045656204224, "epoch": 7.82, "learning_rate": 1.0887573964497042e-05, "loss": 0.3499, "step": 9254, "task_loss": 0.3936062157154083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35859912633895874, "epoch": 7.82, "learning_rate": 1.088334742180896e-05, "loss": 0.2846, "step": 9255, "task_loss": 0.7740437388420105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17286600172519684, "epoch": 7.82, "learning_rate": 1.087912087912088e-05, "loss": 0.2555, "step": 9256, "task_loss": 0.18293415009975433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36977535486221313, "epoch": 7.82, "learning_rate": 1.08748943364328e-05, "loss": 0.3573, "step": 9257, "task_loss": 0.9208285808563232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.326702356338501, "epoch": 7.83, "learning_rate": 1.0870667793744718e-05, "loss": 0.3987, "step": 9258, "task_loss": 0.23134936392307281 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5045897364616394, "epoch": 7.83, "learning_rate": 1.0866441251056636e-05, "loss": 0.462, "step": 9259, "task_loss": 0.6422290205955505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4039265513420105, "epoch": 7.83, "learning_rate": 1.0862214708368555e-05, "loss": 0.4371, "step": 9260, "task_loss": 0.5250632166862488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5410283803939819, "epoch": 7.83, "learning_rate": 1.0857988165680475e-05, "loss": 0.3873, "step": 9261, "task_loss": 0.8205486536026001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3859993517398834, "epoch": 7.83, "learning_rate": 1.0853761622992391e-05, "loss": 0.478, "step": 9262, "task_loss": 0.8574113845825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4508819282054901, "epoch": 7.83, "learning_rate": 1.0849535080304311e-05, "loss": 0.3396, "step": 9263, "task_loss": 0.7825176119804382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3347051441669464, "epoch": 7.83, "learning_rate": 1.0845308537616231e-05, "loss": 0.2939, "step": 9264, "task_loss": 0.7098431587219238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23411542177200317, "epoch": 7.83, "learning_rate": 1.0841081994928149e-05, "loss": 0.4148, "step": 9265, "task_loss": 0.46298152208328247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4776533544063568, "epoch": 7.83, "learning_rate": 1.0836855452240067e-05, "loss": 0.4466, "step": 9266, "task_loss": 0.21664302051067352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3806421756744385, "epoch": 7.83, "learning_rate": 1.0832628909551987e-05, "loss": 0.3549, "step": 9267, "task_loss": 0.9924259781837463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3687785267829895, "epoch": 7.83, "learning_rate": 1.0828402366863907e-05, "loss": 0.3995, "step": 9268, "task_loss": 1.108720064163208 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24215102195739746, "epoch": 7.83, "learning_rate": 1.0824175824175825e-05, "loss": 0.2925, "step": 9269, "task_loss": 0.16937589645385742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45767706632614136, "epoch": 7.84, "learning_rate": 1.0819949281487743e-05, "loss": 0.4664, "step": 9270, "task_loss": 0.51558917760849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3040665090084076, "epoch": 7.84, "learning_rate": 1.0815722738799662e-05, "loss": 0.381, "step": 9271, "task_loss": 0.621455192565918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6382224559783936, "epoch": 7.84, "learning_rate": 1.0811496196111582e-05, "loss": 0.5193, "step": 9272, "task_loss": 1.1030546426773071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.436740905046463, "epoch": 7.84, "learning_rate": 1.08072696534235e-05, "loss": 0.4669, "step": 9273, "task_loss": 0.46199971437454224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2162485420703888, "epoch": 7.84, "learning_rate": 1.080304311073542e-05, "loss": 0.2986, "step": 9274, "task_loss": 0.7285430431365967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7007200717926025, "epoch": 7.84, "learning_rate": 1.0798816568047338e-05, "loss": 0.5333, "step": 9275, "task_loss": 1.3763097524642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33861714601516724, "epoch": 7.84, "learning_rate": 1.0794590025359256e-05, "loss": 0.3324, "step": 9276, "task_loss": 0.49422213435173035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41579142212867737, "epoch": 7.84, "learning_rate": 1.0790363482671176e-05, "loss": 0.3838, "step": 9277, "task_loss": 0.4389484226703644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39072635769844055, "epoch": 7.84, "learning_rate": 1.0786136939983095e-05, "loss": 0.3572, "step": 9278, "task_loss": 0.9798759818077087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4454267621040344, "epoch": 7.84, "learning_rate": 1.0781910397295014e-05, "loss": 0.4148, "step": 9279, "task_loss": 0.9816865921020508 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3471112847328186, "epoch": 7.84, "learning_rate": 1.0777683854606932e-05, "loss": 0.3311, "step": 9280, "task_loss": 0.7161145210266113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3434704840183258, "epoch": 7.84, "learning_rate": 1.0773457311918851e-05, "loss": 0.2772, "step": 9281, "task_loss": 0.3031379282474518 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2679635286331177, "epoch": 7.85, "learning_rate": 1.0769230769230771e-05, "loss": 0.3875, "step": 9282, "task_loss": 0.33075010776519775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3778381943702698, "epoch": 7.85, "learning_rate": 1.0765004226542687e-05, "loss": 0.2594, "step": 9283, "task_loss": 0.24884802103042603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6224073767662048, "epoch": 7.85, "learning_rate": 1.0760777683854607e-05, "loss": 0.412, "step": 9284, "task_loss": 1.0142754316329956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3548678457736969, "epoch": 7.85, "learning_rate": 1.0756551141166527e-05, "loss": 0.4708, "step": 9285, "task_loss": 0.3485974371433258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2104184627532959, "epoch": 7.85, "learning_rate": 1.0752324598478445e-05, "loss": 0.3173, "step": 9286, "task_loss": 0.19081072509288788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3008164167404175, "epoch": 7.85, "learning_rate": 1.0748098055790363e-05, "loss": 0.3849, "step": 9287, "task_loss": 0.10594374686479568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39720824360847473, "epoch": 7.85, "learning_rate": 1.0743871513102283e-05, "loss": 0.3885, "step": 9288, "task_loss": 0.7695749402046204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35902607440948486, "epoch": 7.85, "learning_rate": 1.0739644970414202e-05, "loss": 0.3734, "step": 9289, "task_loss": 0.2672860622406006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3068811297416687, "epoch": 7.85, "learning_rate": 1.073541842772612e-05, "loss": 0.4346, "step": 9290, "task_loss": 0.7562078237533569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22187155485153198, "epoch": 7.85, "learning_rate": 1.0731191885038039e-05, "loss": 0.2723, "step": 9291, "task_loss": 0.38457921147346497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33657142519950867, "epoch": 7.85, "learning_rate": 1.0726965342349958e-05, "loss": 0.2996, "step": 9292, "task_loss": 0.9042322635650635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5300925970077515, "epoch": 7.85, "learning_rate": 1.0722738799661878e-05, "loss": 0.4183, "step": 9293, "task_loss": 0.6678743958473206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28959164023399353, "epoch": 7.86, "learning_rate": 1.0718512256973796e-05, "loss": 0.3758, "step": 9294, "task_loss": 0.8730827569961548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3192163109779358, "epoch": 7.86, "learning_rate": 1.0714285714285714e-05, "loss": 0.3226, "step": 9295, "task_loss": 0.7750691771507263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24617919325828552, "epoch": 7.86, "learning_rate": 1.0710059171597634e-05, "loss": 0.3409, "step": 9296, "task_loss": 0.3111129701137543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5864434242248535, "epoch": 7.86, "learning_rate": 1.0705832628909552e-05, "loss": 0.4895, "step": 9297, "task_loss": 0.3661956191062927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41226378083229065, "epoch": 7.86, "learning_rate": 1.0701606086221472e-05, "loss": 0.3716, "step": 9298, "task_loss": 1.0340471267700195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5685677528381348, "epoch": 7.86, "learning_rate": 1.069737954353339e-05, "loss": 0.4621, "step": 9299, "task_loss": 0.9203925132751465 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.390229195356369, "epoch": 7.86, "learning_rate": 1.069315300084531e-05, "loss": 0.3804, "step": 9300, "task_loss": 0.5961461663246155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4006960391998291, "epoch": 7.86, "learning_rate": 1.0688926458157227e-05, "loss": 0.3864, "step": 9301, "task_loss": 1.086032748222351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4856417179107666, "epoch": 7.86, "learning_rate": 1.0684699915469147e-05, "loss": 0.3861, "step": 9302, "task_loss": 0.8427696824073792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4628952741622925, "epoch": 7.86, "learning_rate": 1.0680473372781065e-05, "loss": 0.3843, "step": 9303, "task_loss": 0.6110355257987976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27146050333976746, "epoch": 7.86, "learning_rate": 1.0676246830092985e-05, "loss": 0.4831, "step": 9304, "task_loss": 0.5670109987258911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4627695083618164, "epoch": 7.87, "learning_rate": 1.0672020287404903e-05, "loss": 0.4282, "step": 9305, "task_loss": 0.26529747247695923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2669185698032379, "epoch": 7.87, "learning_rate": 1.0667793744716823e-05, "loss": 0.4186, "step": 9306, "task_loss": 0.7672763466835022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2834002375602722, "epoch": 7.87, "learning_rate": 1.0663567202028742e-05, "loss": 0.3756, "step": 9307, "task_loss": 0.3437587320804596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5161275863647461, "epoch": 7.87, "learning_rate": 1.0659340659340659e-05, "loss": 0.3767, "step": 9308, "task_loss": 0.9973774552345276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34854525327682495, "epoch": 7.87, "learning_rate": 1.0655114116652579e-05, "loss": 0.4634, "step": 9309, "task_loss": 0.3313879668712616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2869054973125458, "epoch": 7.87, "learning_rate": 1.0650887573964498e-05, "loss": 0.449, "step": 9310, "task_loss": 0.50322026014328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39097028970718384, "epoch": 7.87, "learning_rate": 1.0646661031276416e-05, "loss": 0.4056, "step": 9311, "task_loss": 0.6332195401191711 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46916520595550537, "epoch": 7.87, "learning_rate": 1.0642434488588334e-05, "loss": 0.564, "step": 9312, "task_loss": 0.6158884167671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21863512694835663, "epoch": 7.87, "learning_rate": 1.0638207945900254e-05, "loss": 0.3167, "step": 9313, "task_loss": 0.37384873628616333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25401031970977783, "epoch": 7.87, "learning_rate": 1.0633981403212174e-05, "loss": 0.4379, "step": 9314, "task_loss": 0.6856475472450256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43018198013305664, "epoch": 7.87, "learning_rate": 1.0629754860524092e-05, "loss": 0.458, "step": 9315, "task_loss": 0.9519250988960266 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28802549839019775, "epoch": 7.87, "learning_rate": 1.062552831783601e-05, "loss": 0.4003, "step": 9316, "task_loss": 0.8300285935401917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35494333505630493, "epoch": 7.88, "learning_rate": 1.062130177514793e-05, "loss": 0.4602, "step": 9317, "task_loss": 0.5488393306732178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28769451379776, "epoch": 7.88, "learning_rate": 1.0617075232459848e-05, "loss": 0.3754, "step": 9318, "task_loss": 0.5718137621879578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26615551114082336, "epoch": 7.88, "learning_rate": 1.0612848689771767e-05, "loss": 0.3624, "step": 9319, "task_loss": 0.8932967782020569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28873610496520996, "epoch": 7.88, "learning_rate": 1.0608622147083686e-05, "loss": 0.3625, "step": 9320, "task_loss": 0.8059813380241394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3873116970062256, "epoch": 7.88, "learning_rate": 1.0604395604395605e-05, "loss": 0.4202, "step": 9321, "task_loss": 0.3786969482898712 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5934972167015076, "epoch": 7.88, "learning_rate": 1.0600169061707523e-05, "loss": 0.4451, "step": 9322, "task_loss": 0.5832181572914124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24798718094825745, "epoch": 7.88, "learning_rate": 1.0595942519019443e-05, "loss": 0.3366, "step": 9323, "task_loss": 0.44672125577926636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30515140295028687, "epoch": 7.88, "learning_rate": 1.0591715976331361e-05, "loss": 0.4542, "step": 9324, "task_loss": 1.3762881755828857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3264380097389221, "epoch": 7.88, "learning_rate": 1.058748943364328e-05, "loss": 0.3329, "step": 9325, "task_loss": 0.14306579530239105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29918310046195984, "epoch": 7.88, "learning_rate": 1.0583262890955199e-05, "loss": 0.3892, "step": 9326, "task_loss": 0.3864036798477173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3965027928352356, "epoch": 7.88, "learning_rate": 1.0579036348267119e-05, "loss": 0.4572, "step": 9327, "task_loss": 0.41052335500717163 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.393541157245636, "epoch": 7.88, "learning_rate": 1.0574809805579037e-05, "loss": 0.3673, "step": 9328, "task_loss": 0.9022150039672852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20806947350502014, "epoch": 7.89, "learning_rate": 1.0570583262890955e-05, "loss": 0.3604, "step": 9329, "task_loss": 0.6552309989929199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21843647956848145, "epoch": 7.89, "learning_rate": 1.0566356720202874e-05, "loss": 0.3247, "step": 9330, "task_loss": 0.7550066709518433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3441922068595886, "epoch": 7.89, "learning_rate": 1.0562130177514794e-05, "loss": 0.3912, "step": 9331, "task_loss": 0.32267022132873535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2799738049507141, "epoch": 7.89, "learning_rate": 1.0557903634826712e-05, "loss": 0.3771, "step": 9332, "task_loss": 0.6509575247764587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48090341687202454, "epoch": 7.89, "learning_rate": 1.055367709213863e-05, "loss": 0.38, "step": 9333, "task_loss": 1.0719225406646729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.345784068107605, "epoch": 7.89, "learning_rate": 1.054945054945055e-05, "loss": 0.3091, "step": 9334, "task_loss": 0.705093502998352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31461939215660095, "epoch": 7.89, "learning_rate": 1.054522400676247e-05, "loss": 0.4917, "step": 9335, "task_loss": 0.3675152063369751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2938218116760254, "epoch": 7.89, "learning_rate": 1.0540997464074388e-05, "loss": 0.4663, "step": 9336, "task_loss": 1.1813033819198608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3476431667804718, "epoch": 7.89, "learning_rate": 1.0536770921386306e-05, "loss": 0.289, "step": 9337, "task_loss": 0.6157872080802917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.668760359287262, "epoch": 7.89, "learning_rate": 1.0532544378698226e-05, "loss": 0.5169, "step": 9338, "task_loss": 1.2601101398468018 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36466726660728455, "epoch": 7.89, "learning_rate": 1.0528317836010145e-05, "loss": 0.383, "step": 9339, "task_loss": 1.520774245262146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49753037095069885, "epoch": 7.89, "learning_rate": 1.0524091293322063e-05, "loss": 0.3962, "step": 9340, "task_loss": 1.120042085647583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40975221991539, "epoch": 7.9, "learning_rate": 1.0519864750633981e-05, "loss": 0.384, "step": 9341, "task_loss": 0.46020108461380005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22798791527748108, "epoch": 7.9, "learning_rate": 1.0515638207945901e-05, "loss": 0.4763, "step": 9342, "task_loss": 0.4292354881763458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27462202310562134, "epoch": 7.9, "learning_rate": 1.051141166525782e-05, "loss": 0.3773, "step": 9343, "task_loss": 0.36112216114997864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3133104741573334, "epoch": 7.9, "learning_rate": 1.0507185122569739e-05, "loss": 0.4259, "step": 9344, "task_loss": 0.443113774061203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3287684917449951, "epoch": 7.9, "learning_rate": 1.0502958579881657e-05, "loss": 0.3504, "step": 9345, "task_loss": 0.07360823452472687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29685765504837036, "epoch": 7.9, "learning_rate": 1.0498732037193577e-05, "loss": 0.4172, "step": 9346, "task_loss": 0.8013165593147278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.306978315114975, "epoch": 7.9, "learning_rate": 1.0494505494505495e-05, "loss": 0.3974, "step": 9347, "task_loss": 0.6382566094398499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3207901120185852, "epoch": 7.9, "learning_rate": 1.0490278951817414e-05, "loss": 0.3409, "step": 9348, "task_loss": 1.107635259628296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19824336469173431, "epoch": 7.9, "learning_rate": 1.0486052409129333e-05, "loss": 0.2463, "step": 9349, "task_loss": 0.5026869773864746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19186586141586304, "epoch": 7.9, "learning_rate": 1.048182586644125e-05, "loss": 0.3904, "step": 9350, "task_loss": 0.43056154251098633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3297847509384155, "epoch": 7.9, "learning_rate": 1.047759932375317e-05, "loss": 0.3672, "step": 9351, "task_loss": 0.3292813301086426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3019562363624573, "epoch": 7.9, "learning_rate": 1.047337278106509e-05, "loss": 0.4057, "step": 9352, "task_loss": 0.2962067723274231 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33273234963417053, "epoch": 7.91, "learning_rate": 1.0469146238377008e-05, "loss": 0.3189, "step": 9353, "task_loss": 0.3907542824745178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2746749222278595, "epoch": 7.91, "learning_rate": 1.0464919695688926e-05, "loss": 0.5712, "step": 9354, "task_loss": 0.7671428918838501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3933637738227844, "epoch": 7.91, "learning_rate": 1.0460693153000846e-05, "loss": 0.4262, "step": 9355, "task_loss": 0.5202326774597168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3578820526599884, "epoch": 7.91, "learning_rate": 1.0456466610312766e-05, "loss": 0.3935, "step": 9356, "task_loss": 0.7592084407806396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5406107902526855, "epoch": 7.91, "learning_rate": 1.0452240067624684e-05, "loss": 0.4077, "step": 9357, "task_loss": 0.5751054286956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2250983715057373, "epoch": 7.91, "learning_rate": 1.0448013524936602e-05, "loss": 0.3505, "step": 9358, "task_loss": 0.14698077738285065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5378384590148926, "epoch": 7.91, "learning_rate": 1.0443786982248521e-05, "loss": 0.4738, "step": 9359, "task_loss": 0.18897125124931335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4620664119720459, "epoch": 7.91, "learning_rate": 1.0439560439560441e-05, "loss": 0.4251, "step": 9360, "task_loss": 0.6229583621025085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4064135253429413, "epoch": 7.91, "learning_rate": 1.0435333896872358e-05, "loss": 0.3698, "step": 9361, "task_loss": 0.8910341858863831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1648712158203125, "epoch": 7.91, "learning_rate": 1.0431107354184277e-05, "loss": 0.3465, "step": 9362, "task_loss": 0.6734543442726135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19563442468643188, "epoch": 7.91, "learning_rate": 1.0426880811496197e-05, "loss": 0.2921, "step": 9363, "task_loss": 0.30024436116218567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3703368008136749, "epoch": 7.91, "learning_rate": 1.0422654268808115e-05, "loss": 0.3696, "step": 9364, "task_loss": 0.2711583077907562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3399535119533539, "epoch": 7.92, "learning_rate": 1.0418427726120035e-05, "loss": 0.2639, "step": 9365, "task_loss": 0.30582135915756226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35873204469680786, "epoch": 7.92, "learning_rate": 1.0414201183431953e-05, "loss": 0.3214, "step": 9366, "task_loss": 0.668890118598938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3131570518016815, "epoch": 7.92, "learning_rate": 1.0409974640743873e-05, "loss": 0.2867, "step": 9367, "task_loss": 0.2604713439941406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3699115812778473, "epoch": 7.92, "learning_rate": 1.040574809805579e-05, "loss": 0.3903, "step": 9368, "task_loss": 0.6998211741447449 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33093830943107605, "epoch": 7.92, "learning_rate": 1.040152155536771e-05, "loss": 0.3774, "step": 9369, "task_loss": 0.6415210962295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.237060546875, "epoch": 7.92, "learning_rate": 1.0397295012679628e-05, "loss": 0.3896, "step": 9370, "task_loss": 0.546448290348053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4397224187850952, "epoch": 7.92, "learning_rate": 1.0393068469991548e-05, "loss": 0.3842, "step": 9371, "task_loss": 0.7050089836120605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26140260696411133, "epoch": 7.92, "learning_rate": 1.0388841927303466e-05, "loss": 0.329, "step": 9372, "task_loss": 0.23296354711055756 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41678908467292786, "epoch": 7.92, "learning_rate": 1.0384615384615386e-05, "loss": 0.3816, "step": 9373, "task_loss": 1.048256516456604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39260610938072205, "epoch": 7.92, "learning_rate": 1.0380388841927304e-05, "loss": 0.3812, "step": 9374, "task_loss": 1.2521941661834717 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4431672990322113, "epoch": 7.92, "learning_rate": 1.0376162299239222e-05, "loss": 0.4888, "step": 9375, "task_loss": 0.5465877652168274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.56330406665802, "epoch": 7.93, "learning_rate": 1.0371935756551142e-05, "loss": 0.4941, "step": 9376, "task_loss": 1.1811200380325317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3872709274291992, "epoch": 7.93, "learning_rate": 1.0367709213863061e-05, "loss": 0.5543, "step": 9377, "task_loss": 0.3980772793292999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42802882194519043, "epoch": 7.93, "learning_rate": 1.036348267117498e-05, "loss": 0.3673, "step": 9378, "task_loss": 0.4631832242012024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27570968866348267, "epoch": 7.93, "learning_rate": 1.0359256128486898e-05, "loss": 0.3618, "step": 9379, "task_loss": 1.0667927265167236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2934645414352417, "epoch": 7.93, "learning_rate": 1.0355029585798817e-05, "loss": 0.3589, "step": 9380, "task_loss": 1.0715514421463013 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46468454599380493, "epoch": 7.93, "learning_rate": 1.0350803043110737e-05, "loss": 0.3422, "step": 9381, "task_loss": 0.45326223969459534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36552029848098755, "epoch": 7.93, "learning_rate": 1.0346576500422653e-05, "loss": 0.3657, "step": 9382, "task_loss": 0.6220387816429138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32779645919799805, "epoch": 7.93, "learning_rate": 1.0342349957734573e-05, "loss": 0.3138, "step": 9383, "task_loss": 0.24285943806171417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3008289933204651, "epoch": 7.93, "learning_rate": 1.0338123415046493e-05, "loss": 0.3984, "step": 9384, "task_loss": 0.6523308157920837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2053653746843338, "epoch": 7.93, "learning_rate": 1.0333896872358411e-05, "loss": 0.3291, "step": 9385, "task_loss": 0.6391233801841736 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2764323949813843, "epoch": 7.93, "learning_rate": 1.0329670329670329e-05, "loss": 0.3431, "step": 9386, "task_loss": 0.8804179430007935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.58913254737854, "epoch": 7.93, "learning_rate": 1.0325443786982249e-05, "loss": 0.3834, "step": 9387, "task_loss": 0.23427382111549377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6203899383544922, "epoch": 7.94, "learning_rate": 1.0321217244294168e-05, "loss": 0.4805, "step": 9388, "task_loss": 0.8846639394760132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.353671133518219, "epoch": 7.94, "learning_rate": 1.0316990701606086e-05, "loss": 0.3084, "step": 9389, "task_loss": 0.30331912636756897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25826042890548706, "epoch": 7.94, "learning_rate": 1.0312764158918005e-05, "loss": 0.3345, "step": 9390, "task_loss": 0.4506385922431946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40124833583831787, "epoch": 7.94, "learning_rate": 1.0308537616229924e-05, "loss": 0.3399, "step": 9391, "task_loss": 0.46779727935791016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.183677077293396, "epoch": 7.94, "learning_rate": 1.0304311073541844e-05, "loss": 0.3317, "step": 9392, "task_loss": 0.4071427881717682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46816083788871765, "epoch": 7.94, "learning_rate": 1.0300084530853762e-05, "loss": 0.4183, "step": 9393, "task_loss": 0.6161310076713562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22236217558383942, "epoch": 7.94, "learning_rate": 1.0295857988165682e-05, "loss": 0.3654, "step": 9394, "task_loss": 0.42631030082702637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5708345174789429, "epoch": 7.94, "learning_rate": 1.02916314454776e-05, "loss": 0.42, "step": 9395, "task_loss": 0.6909058690071106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46304288506507874, "epoch": 7.94, "learning_rate": 1.0287404902789518e-05, "loss": 0.4389, "step": 9396, "task_loss": 0.8769976496696472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3119366765022278, "epoch": 7.94, "learning_rate": 1.0283178360101438e-05, "loss": 0.2819, "step": 9397, "task_loss": 0.44786685705184937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35893315076828003, "epoch": 7.94, "learning_rate": 1.0278951817413357e-05, "loss": 0.3177, "step": 9398, "task_loss": 0.8647464513778687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19748623669147491, "epoch": 7.94, "learning_rate": 1.0274725274725275e-05, "loss": 0.3147, "step": 9399, "task_loss": 0.24764443933963776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6548997759819031, "epoch": 7.95, "learning_rate": 1.0270498732037193e-05, "loss": 0.4937, "step": 9400, "task_loss": 0.5669742822647095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29760482907295227, "epoch": 7.95, "learning_rate": 1.0266272189349113e-05, "loss": 0.3946, "step": 9401, "task_loss": 0.3779524564743042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31724005937576294, "epoch": 7.95, "learning_rate": 1.0262045646661033e-05, "loss": 0.2924, "step": 9402, "task_loss": 0.329680472612381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3954460620880127, "epoch": 7.95, "learning_rate": 1.0257819103972951e-05, "loss": 0.4274, "step": 9403, "task_loss": 0.7274028658866882 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41761332750320435, "epoch": 7.95, "learning_rate": 1.0253592561284869e-05, "loss": 0.3525, "step": 9404, "task_loss": 0.7435610294342041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4076928496360779, "epoch": 7.95, "learning_rate": 1.0249366018596789e-05, "loss": 0.4311, "step": 9405, "task_loss": 0.7442341446876526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21257227659225464, "epoch": 7.95, "learning_rate": 1.0245139475908708e-05, "loss": 0.289, "step": 9406, "task_loss": 0.09206889569759369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2921108901500702, "epoch": 7.95, "learning_rate": 1.0240912933220625e-05, "loss": 0.3723, "step": 9407, "task_loss": 0.3296409547328949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2768428325653076, "epoch": 7.95, "learning_rate": 1.0236686390532545e-05, "loss": 0.4623, "step": 9408, "task_loss": 1.0802311897277832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4046192169189453, "epoch": 7.95, "learning_rate": 1.0232459847844464e-05, "loss": 0.4377, "step": 9409, "task_loss": 0.5437915921211243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27469420433044434, "epoch": 7.95, "learning_rate": 1.0228233305156382e-05, "loss": 0.3587, "step": 9410, "task_loss": 0.22336819767951965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2558116912841797, "epoch": 7.95, "learning_rate": 1.02240067624683e-05, "loss": 0.3903, "step": 9411, "task_loss": 0.0908508151769638 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2631528973579407, "epoch": 7.96, "learning_rate": 1.021978021978022e-05, "loss": 0.4251, "step": 9412, "task_loss": 0.45004695653915405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14367088675498962, "epoch": 7.96, "learning_rate": 1.021555367709214e-05, "loss": 0.2798, "step": 9413, "task_loss": 0.2983837425708771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.354820191860199, "epoch": 7.96, "learning_rate": 1.0211327134404058e-05, "loss": 0.4796, "step": 9414, "task_loss": 0.3660299777984619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3306651711463928, "epoch": 7.96, "learning_rate": 1.0207100591715976e-05, "loss": 0.4183, "step": 9415, "task_loss": 0.7402946949005127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3089539706707001, "epoch": 7.96, "learning_rate": 1.0202874049027896e-05, "loss": 0.3282, "step": 9416, "task_loss": 0.32596513628959656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2824006676673889, "epoch": 7.96, "learning_rate": 1.0198647506339814e-05, "loss": 0.3203, "step": 9417, "task_loss": 0.32522502541542053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.332379549741745, "epoch": 7.96, "learning_rate": 1.0194420963651734e-05, "loss": 0.4061, "step": 9418, "task_loss": 0.32181316614151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3873562812805176, "epoch": 7.96, "learning_rate": 1.0190194420963652e-05, "loss": 0.556, "step": 9419, "task_loss": 0.2017328292131424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49662449955940247, "epoch": 7.96, "learning_rate": 1.0185967878275571e-05, "loss": 0.4462, "step": 9420, "task_loss": 0.9196640253067017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5504055023193359, "epoch": 7.96, "learning_rate": 1.018174133558749e-05, "loss": 0.3433, "step": 9421, "task_loss": 0.8239861726760864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32461249828338623, "epoch": 7.96, "learning_rate": 1.0177514792899409e-05, "loss": 0.4561, "step": 9422, "task_loss": 1.59067964553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40270233154296875, "epoch": 7.96, "learning_rate": 1.0173288250211329e-05, "loss": 0.3899, "step": 9423, "task_loss": 1.4099159240722656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44141674041748047, "epoch": 7.97, "learning_rate": 1.0169061707523247e-05, "loss": 0.3505, "step": 9424, "task_loss": 0.36235588788986206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3399415910243988, "epoch": 7.97, "learning_rate": 1.0164835164835165e-05, "loss": 0.294, "step": 9425, "task_loss": 0.5679250359535217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19249096512794495, "epoch": 7.97, "learning_rate": 1.0160608622147085e-05, "loss": 0.3718, "step": 9426, "task_loss": 0.5124766826629639 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5217880606651306, "epoch": 7.97, "learning_rate": 1.0156382079459004e-05, "loss": 0.5107, "step": 9427, "task_loss": 0.9031076431274414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3124064803123474, "epoch": 7.97, "learning_rate": 1.015215553677092e-05, "loss": 0.3755, "step": 9428, "task_loss": 1.2427045106887817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5272301435470581, "epoch": 7.97, "learning_rate": 1.014792899408284e-05, "loss": 0.4025, "step": 9429, "task_loss": 0.8686051964759827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28804540634155273, "epoch": 7.97, "learning_rate": 1.014370245139476e-05, "loss": 0.4005, "step": 9430, "task_loss": 0.7669927477836609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24318541586399078, "epoch": 7.97, "learning_rate": 1.0139475908706678e-05, "loss": 0.3925, "step": 9431, "task_loss": 0.2363303005695343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5863572955131531, "epoch": 7.97, "learning_rate": 1.0135249366018596e-05, "loss": 0.4184, "step": 9432, "task_loss": 1.0201488733291626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4496966600418091, "epoch": 7.97, "learning_rate": 1.0131022823330516e-05, "loss": 0.478, "step": 9433, "task_loss": 0.30431491136550903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3626857399940491, "epoch": 7.97, "learning_rate": 1.0126796280642436e-05, "loss": 0.3859, "step": 9434, "task_loss": 0.6130858659744263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2506057620048523, "epoch": 7.97, "learning_rate": 1.0122569737954354e-05, "loss": 0.5043, "step": 9435, "task_loss": 0.1877564936876297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3694946765899658, "epoch": 7.98, "learning_rate": 1.0118343195266272e-05, "loss": 0.3978, "step": 9436, "task_loss": 0.7117792367935181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5303183197975159, "epoch": 7.98, "learning_rate": 1.0114116652578192e-05, "loss": 0.4806, "step": 9437, "task_loss": 1.1630799770355225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35216695070266724, "epoch": 7.98, "learning_rate": 1.0109890109890111e-05, "loss": 0.3244, "step": 9438, "task_loss": 0.6616082787513733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2138129025697708, "epoch": 7.98, "learning_rate": 1.010566356720203e-05, "loss": 0.345, "step": 9439, "task_loss": 0.21335120499134064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20888842642307281, "epoch": 7.98, "learning_rate": 1.0101437024513947e-05, "loss": 0.3096, "step": 9440, "task_loss": 0.348694771528244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31142765283584595, "epoch": 7.98, "learning_rate": 1.0097210481825867e-05, "loss": 0.2904, "step": 9441, "task_loss": 0.6710964441299438 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44097262620925903, "epoch": 7.98, "learning_rate": 1.0092983939137785e-05, "loss": 0.529, "step": 9442, "task_loss": 0.39753127098083496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3438207507133484, "epoch": 7.98, "learning_rate": 1.0088757396449705e-05, "loss": 0.348, "step": 9443, "task_loss": 0.642531156539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38025474548339844, "epoch": 7.98, "learning_rate": 1.0084530853761623e-05, "loss": 0.4213, "step": 9444, "task_loss": 0.33351853489875793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24770861864089966, "epoch": 7.98, "learning_rate": 1.0080304311073543e-05, "loss": 0.3548, "step": 9445, "task_loss": 0.45884454250335693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33241963386535645, "epoch": 7.98, "learning_rate": 1.007607776838546e-05, "loss": 0.356, "step": 9446, "task_loss": 0.6009525656700134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9162295460700989, "epoch": 7.99, "learning_rate": 1.007185122569738e-05, "loss": 0.4755, "step": 9447, "task_loss": 0.8147220015525818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15189969539642334, "epoch": 7.99, "learning_rate": 1.0067624683009299e-05, "loss": 0.3256, "step": 9448, "task_loss": 0.32074421644210815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4850310683250427, "epoch": 7.99, "learning_rate": 1.0063398140321217e-05, "loss": 0.4105, "step": 9449, "task_loss": 0.40386563539505005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4874488115310669, "epoch": 7.99, "learning_rate": 1.0059171597633136e-05, "loss": 0.414, "step": 9450, "task_loss": 0.5610076189041138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3008481562137604, "epoch": 7.99, "learning_rate": 1.0054945054945056e-05, "loss": 0.3421, "step": 9451, "task_loss": 0.8644840121269226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2799399495124817, "epoch": 7.99, "learning_rate": 1.0050718512256974e-05, "loss": 0.3506, "step": 9452, "task_loss": 0.3240704834461212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2638999819755554, "epoch": 7.99, "learning_rate": 1.0046491969568892e-05, "loss": 0.32, "step": 9453, "task_loss": 0.24742238223552704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3064427375793457, "epoch": 7.99, "learning_rate": 1.0042265426880812e-05, "loss": 0.3862, "step": 9454, "task_loss": 1.0167747735977173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25037920475006104, "epoch": 7.99, "learning_rate": 1.0038038884192732e-05, "loss": 0.4079, "step": 9455, "task_loss": 0.9736714363098145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3613587021827698, "epoch": 7.99, "learning_rate": 1.003381234150465e-05, "loss": 0.3458, "step": 9456, "task_loss": 0.6769360899925232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2978385090827942, "epoch": 7.99, "learning_rate": 1.0029585798816568e-05, "loss": 0.4779, "step": 9457, "task_loss": 0.5593798160552979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30896925926208496, "epoch": 7.99, "learning_rate": 1.0025359256128487e-05, "loss": 0.4081, "step": 9458, "task_loss": 0.6380171775817871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.693453848361969, "epoch": 8.0, "learning_rate": 1.0021132713440407e-05, "loss": 0.3954, "step": 9459, "task_loss": 0.6205152273178101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2568035125732422, "epoch": 8.0, "learning_rate": 1.0016906170752325e-05, "loss": 0.3991, "step": 9460, "task_loss": 0.882075846195221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5914914608001709, "epoch": 8.0, "learning_rate": 1.0012679628064243e-05, "loss": 0.3545, "step": 9461, "task_loss": 0.5765284895896912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4754551649093628, "epoch": 8.0, "learning_rate": 1.0008453085376163e-05, "loss": 0.4868, "step": 9462, "task_loss": 0.3565811812877655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5446940660476685, "epoch": 8.0, "learning_rate": 1.0004226542688081e-05, "loss": 0.394, "step": 9463, "task_loss": 0.4381572902202606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30494987964630127, "epoch": 8.0, "learning_rate": 1e-05, "loss": 0.261, "step": 9464, "task_loss": 0.38234224915504456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3635275959968567, "epoch": 8.0, "learning_rate": 9.995773457311919e-06, "loss": 0.7953, "step": 9465, "task_loss": 0.5007911324501038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3265039026737213, "epoch": 8.0, "learning_rate": 9.991546914623839e-06, "loss": 0.3029, "step": 9466, "task_loss": 0.2844346761703491 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26586154103279114, "epoch": 8.0, "learning_rate": 9.987320371935757e-06, "loss": 0.2748, "step": 9467, "task_loss": 0.5997502207756042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3282037377357483, "epoch": 8.0, "learning_rate": 9.983093829247676e-06, "loss": 0.3883, "step": 9468, "task_loss": 1.3357172012329102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27700817584991455, "epoch": 8.0, "learning_rate": 9.978867286559594e-06, "loss": 0.4622, "step": 9469, "task_loss": 0.8319427371025085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5357617735862732, "epoch": 8.01, "learning_rate": 9.974640743871514e-06, "loss": 0.3959, "step": 9470, "task_loss": 1.032879114151001 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4563886523246765, "epoch": 8.01, "learning_rate": 9.970414201183432e-06, "loss": 0.3359, "step": 9471, "task_loss": 0.10449904203414917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3475956916809082, "epoch": 8.01, "learning_rate": 9.966187658495352e-06, "loss": 0.4776, "step": 9472, "task_loss": 0.39387616515159607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42351025342941284, "epoch": 8.01, "learning_rate": 9.96196111580727e-06, "loss": 0.4005, "step": 9473, "task_loss": 0.3764391839504242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4575378894805908, "epoch": 8.01, "learning_rate": 9.957734573119188e-06, "loss": 0.3436, "step": 9474, "task_loss": 0.26252058148384094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2280004620552063, "epoch": 8.01, "learning_rate": 9.953508030431108e-06, "loss": 0.3761, "step": 9475, "task_loss": 0.14729619026184082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.50696861743927, "epoch": 8.01, "learning_rate": 9.949281487743028e-06, "loss": 0.3973, "step": 9476, "task_loss": 1.009718656539917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1829715520143509, "epoch": 8.01, "learning_rate": 9.945054945054946e-06, "loss": 0.3651, "step": 9477, "task_loss": 0.14630205929279327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.456539124250412, "epoch": 8.01, "learning_rate": 9.940828402366864e-06, "loss": 0.3854, "step": 9478, "task_loss": 0.4401250183582306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2648431658744812, "epoch": 8.01, "learning_rate": 9.936601859678783e-06, "loss": 0.3288, "step": 9479, "task_loss": 0.04418956860899925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4458216428756714, "epoch": 8.01, "learning_rate": 9.932375316990703e-06, "loss": 0.4529, "step": 9480, "task_loss": 0.3048208951950073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39277467131614685, "epoch": 8.01, "learning_rate": 9.92814877430262e-06, "loss": 0.4193, "step": 9481, "task_loss": 0.4199518859386444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1766292303800583, "epoch": 8.02, "learning_rate": 9.92392223161454e-06, "loss": 0.3768, "step": 9482, "task_loss": 0.22615107893943787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47020259499549866, "epoch": 8.02, "learning_rate": 9.919695688926459e-06, "loss": 0.3335, "step": 9483, "task_loss": 0.14356809854507446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26510873436927795, "epoch": 8.02, "learning_rate": 9.915469146238379e-06, "loss": 0.2679, "step": 9484, "task_loss": 0.1624196171760559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28390875458717346, "epoch": 8.02, "learning_rate": 9.911242603550297e-06, "loss": 0.4356, "step": 9485, "task_loss": 0.4377221167087555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2791476547718048, "epoch": 8.02, "learning_rate": 9.907016060862215e-06, "loss": 0.4166, "step": 9486, "task_loss": 1.0479180812835693 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3591724634170532, "epoch": 8.02, "learning_rate": 9.902789518174134e-06, "loss": 0.4586, "step": 9487, "task_loss": 0.7510116696357727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3330943286418915, "epoch": 8.02, "learning_rate": 9.898562975486053e-06, "loss": 0.3825, "step": 9488, "task_loss": 0.8075860142707825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37684503197669983, "epoch": 8.02, "learning_rate": 9.894336432797972e-06, "loss": 0.3881, "step": 9489, "task_loss": 0.949891984462738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4541041851043701, "epoch": 8.02, "learning_rate": 9.89010989010989e-06, "loss": 0.3906, "step": 9490, "task_loss": 0.6921858787536621 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6335106492042542, "epoch": 8.02, "learning_rate": 9.88588334742181e-06, "loss": 0.4252, "step": 9491, "task_loss": 1.0319186449050903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4646127223968506, "epoch": 8.02, "learning_rate": 9.881656804733728e-06, "loss": 0.4588, "step": 9492, "task_loss": 1.4515480995178223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3685835301876068, "epoch": 8.02, "learning_rate": 9.877430262045648e-06, "loss": 0.3323, "step": 9493, "task_loss": 0.16782687604427338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.400140643119812, "epoch": 8.03, "learning_rate": 9.873203719357566e-06, "loss": 0.407, "step": 9494, "task_loss": 0.383005291223526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4261009097099304, "epoch": 8.03, "learning_rate": 9.868977176669484e-06, "loss": 0.3872, "step": 9495, "task_loss": 0.805411696434021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5084391236305237, "epoch": 8.03, "learning_rate": 9.864750633981404e-06, "loss": 0.4502, "step": 9496, "task_loss": 0.31891733407974243 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30456095933914185, "epoch": 8.03, "learning_rate": 9.860524091293323e-06, "loss": 0.3274, "step": 9497, "task_loss": 0.3608904480934143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2846469283103943, "epoch": 8.03, "learning_rate": 9.856297548605241e-06, "loss": 0.4728, "step": 9498, "task_loss": 0.7982349395751953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3022933602333069, "epoch": 8.03, "learning_rate": 9.85207100591716e-06, "loss": 0.4673, "step": 9499, "task_loss": 0.7299861907958984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40707725286483765, "epoch": 8.03, "learning_rate": 9.84784446322908e-06, "loss": 0.4163, "step": 9500, "task_loss": 0.5602826476097107 }, { "epoch": 8.03, "eval_accuracy": 0.9165544554455446, "eval_loss": 0.2582683563232422, "eval_runtime": 226.4874, "eval_samples_per_second": 111.485, "eval_steps_per_second": 0.874, "step": 9500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40723666548728943, "epoch": 8.03, "learning_rate": 9.843617920540999e-06, "loss": 0.3456, "step": 9501, "task_loss": 0.303974449634552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3229803740978241, "epoch": 8.03, "learning_rate": 9.839391377852917e-06, "loss": 0.383, "step": 9502, "task_loss": 0.8068796396255493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37747031450271606, "epoch": 8.03, "learning_rate": 9.835164835164835e-06, "loss": 0.3836, "step": 9503, "task_loss": 1.0041242837905884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39161014556884766, "epoch": 8.03, "learning_rate": 9.830938292476755e-06, "loss": 0.3756, "step": 9504, "task_loss": 0.7075904607772827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17666351795196533, "epoch": 8.03, "learning_rate": 9.826711749788675e-06, "loss": 0.358, "step": 9505, "task_loss": 0.3021080195903778 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.272285521030426, "epoch": 8.04, "learning_rate": 9.822485207100591e-06, "loss": 0.3603, "step": 9506, "task_loss": 0.40746957063674927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33424729108810425, "epoch": 8.04, "learning_rate": 9.81825866441251e-06, "loss": 0.4974, "step": 9507, "task_loss": 0.7196852564811707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49253612756729126, "epoch": 8.04, "learning_rate": 9.81403212172443e-06, "loss": 0.4067, "step": 9508, "task_loss": 0.6764450073242188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30621182918548584, "epoch": 8.04, "learning_rate": 9.809805579036348e-06, "loss": 0.3965, "step": 9509, "task_loss": 0.6441108584403992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5450552105903625, "epoch": 8.04, "learning_rate": 9.805579036348266e-06, "loss": 0.4649, "step": 9510, "task_loss": 0.934654951095581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34476178884506226, "epoch": 8.04, "learning_rate": 9.801352493660186e-06, "loss": 0.3365, "step": 9511, "task_loss": 0.5674360990524292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3293823003768921, "epoch": 8.04, "learning_rate": 9.797125950972106e-06, "loss": 0.3946, "step": 9512, "task_loss": 0.36159422993659973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32382678985595703, "epoch": 8.04, "learning_rate": 9.792899408284024e-06, "loss": 0.3184, "step": 9513, "task_loss": 1.3337057828903198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21925796568393707, "epoch": 8.04, "learning_rate": 9.788672865595944e-06, "loss": 0.343, "step": 9514, "task_loss": 0.23473528027534485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29959332942962646, "epoch": 8.04, "learning_rate": 9.784446322907862e-06, "loss": 0.3779, "step": 9515, "task_loss": 1.0600448846817017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2582482695579529, "epoch": 8.04, "learning_rate": 9.78021978021978e-06, "loss": 0.3906, "step": 9516, "task_loss": 0.4543503522872925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23099832236766815, "epoch": 8.04, "learning_rate": 9.7759932375317e-06, "loss": 0.3231, "step": 9517, "task_loss": 0.4425613284111023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4333673119544983, "epoch": 8.05, "learning_rate": 9.77176669484362e-06, "loss": 0.3638, "step": 9518, "task_loss": 0.5323060750961304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38231539726257324, "epoch": 8.05, "learning_rate": 9.767540152155537e-06, "loss": 0.3196, "step": 9519, "task_loss": 0.6407788395881653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4929042160511017, "epoch": 8.05, "learning_rate": 9.763313609467455e-06, "loss": 0.4189, "step": 9520, "task_loss": 0.49906104803085327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395448923110962, "epoch": 8.05, "learning_rate": 9.759087066779375e-06, "loss": 0.4829, "step": 9521, "task_loss": 0.20274889469146729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37314921617507935, "epoch": 8.05, "learning_rate": 9.754860524091295e-06, "loss": 0.5312, "step": 9522, "task_loss": 0.3446331322193146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25583717226982117, "epoch": 8.05, "learning_rate": 9.750633981403213e-06, "loss": 0.3544, "step": 9523, "task_loss": 0.1991509646177292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23117810487747192, "epoch": 8.05, "learning_rate": 9.746407438715131e-06, "loss": 0.3681, "step": 9524, "task_loss": 0.18167535960674286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24815919995307922, "epoch": 8.05, "learning_rate": 9.74218089602705e-06, "loss": 0.3838, "step": 9525, "task_loss": 0.41123390197753906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21954123675823212, "epoch": 8.05, "learning_rate": 9.73795435333897e-06, "loss": 0.2999, "step": 9526, "task_loss": 0.14730186760425568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42896896600723267, "epoch": 8.05, "learning_rate": 9.733727810650887e-06, "loss": 0.3161, "step": 9527, "task_loss": 1.0610079765319824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4465399384498596, "epoch": 8.05, "learning_rate": 9.729501267962806e-06, "loss": 0.3657, "step": 9528, "task_loss": 0.5230749845504761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6194783449172974, "epoch": 8.05, "learning_rate": 9.725274725274726e-06, "loss": 0.4487, "step": 9529, "task_loss": 0.8953304290771484 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4146515727043152, "epoch": 8.06, "learning_rate": 9.721048182586644e-06, "loss": 0.4122, "step": 9530, "task_loss": 0.6930051445960999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5738439559936523, "epoch": 8.06, "learning_rate": 9.716821639898562e-06, "loss": 0.3722, "step": 9531, "task_loss": 0.5267540812492371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3677220940589905, "epoch": 8.06, "learning_rate": 9.712595097210482e-06, "loss": 0.2843, "step": 9532, "task_loss": 0.3634708523750305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4567508101463318, "epoch": 8.06, "learning_rate": 9.708368554522402e-06, "loss": 0.3547, "step": 9533, "task_loss": 0.6932446360588074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4244416654109955, "epoch": 8.06, "learning_rate": 9.70414201183432e-06, "loss": 0.4385, "step": 9534, "task_loss": 0.5224007964134216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4627702832221985, "epoch": 8.06, "learning_rate": 9.699915469146238e-06, "loss": 0.3477, "step": 9535, "task_loss": 0.3209187090396881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3206583559513092, "epoch": 8.06, "learning_rate": 9.695688926458158e-06, "loss": 0.3176, "step": 9536, "task_loss": 0.09001386910676956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17339655756950378, "epoch": 8.06, "learning_rate": 9.691462383770077e-06, "loss": 0.3186, "step": 9537, "task_loss": 0.2191508412361145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5378230214118958, "epoch": 8.06, "learning_rate": 9.687235841081995e-06, "loss": 0.3741, "step": 9538, "task_loss": 0.908126950263977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3767419159412384, "epoch": 8.06, "learning_rate": 9.683009298393913e-06, "loss": 0.3763, "step": 9539, "task_loss": 0.590830385684967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3942040205001831, "epoch": 8.06, "learning_rate": 9.678782755705833e-06, "loss": 0.4315, "step": 9540, "task_loss": 0.3798633813858032 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39076346158981323, "epoch": 8.07, "learning_rate": 9.674556213017751e-06, "loss": 0.4078, "step": 9541, "task_loss": 0.6101036071777344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21464087069034576, "epoch": 8.07, "learning_rate": 9.670329670329671e-06, "loss": 0.3844, "step": 9542, "task_loss": 1.1720894575119019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3350306749343872, "epoch": 8.07, "learning_rate": 9.66610312764159e-06, "loss": 0.3873, "step": 9543, "task_loss": 0.4297977387905121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47340577840805054, "epoch": 8.07, "learning_rate": 9.661876584953509e-06, "loss": 0.3778, "step": 9544, "task_loss": 0.25794732570648193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5419970750808716, "epoch": 8.07, "learning_rate": 9.657650042265427e-06, "loss": 0.3778, "step": 9545, "task_loss": 0.5469459891319275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45987582206726074, "epoch": 8.07, "learning_rate": 9.653423499577347e-06, "loss": 0.3318, "step": 9546, "task_loss": 1.442956805229187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4366358518600464, "epoch": 8.07, "learning_rate": 9.649196956889266e-06, "loss": 0.5575, "step": 9547, "task_loss": 0.6381012201309204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4083651900291443, "epoch": 8.07, "learning_rate": 9.644970414201183e-06, "loss": 0.4026, "step": 9548, "task_loss": 0.4918254315853119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6344305276870728, "epoch": 8.07, "learning_rate": 9.640743871513102e-06, "loss": 0.4557, "step": 9549, "task_loss": 0.7844622135162354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2742043137550354, "epoch": 8.07, "learning_rate": 9.636517328825022e-06, "loss": 0.3116, "step": 9550, "task_loss": 0.43593645095825195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.316173791885376, "epoch": 8.07, "learning_rate": 9.632290786136942e-06, "loss": 0.3362, "step": 9551, "task_loss": 0.2711165249347687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37108156085014343, "epoch": 8.07, "learning_rate": 9.628064243448858e-06, "loss": 0.3363, "step": 9552, "task_loss": 0.6358299255371094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2919468879699707, "epoch": 8.08, "learning_rate": 9.623837700760778e-06, "loss": 0.3776, "step": 9553, "task_loss": 0.7901868224143982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5201333165168762, "epoch": 8.08, "learning_rate": 9.619611158072698e-06, "loss": 0.4396, "step": 9554, "task_loss": 0.39068761467933655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3648889660835266, "epoch": 8.08, "learning_rate": 9.615384615384616e-06, "loss": 0.4203, "step": 9555, "task_loss": 1.0932284593582153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4836452603340149, "epoch": 8.08, "learning_rate": 9.611158072696534e-06, "loss": 0.389, "step": 9556, "task_loss": 0.7916218042373657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27200859785079956, "epoch": 8.08, "learning_rate": 9.606931530008453e-06, "loss": 0.2992, "step": 9557, "task_loss": 0.23351293802261353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2762584686279297, "epoch": 8.08, "learning_rate": 9.602704987320373e-06, "loss": 0.344, "step": 9558, "task_loss": 0.2612742781639099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46028584241867065, "epoch": 8.08, "learning_rate": 9.598478444632291e-06, "loss": 0.4589, "step": 9559, "task_loss": 0.4408276677131653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2632410526275635, "epoch": 8.08, "learning_rate": 9.59425190194421e-06, "loss": 0.2922, "step": 9560, "task_loss": 0.4985095262527466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.492450475692749, "epoch": 8.08, "learning_rate": 9.590025359256129e-06, "loss": 0.3553, "step": 9561, "task_loss": 0.42737436294555664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2609958350658417, "epoch": 8.08, "learning_rate": 9.585798816568047e-06, "loss": 0.4056, "step": 9562, "task_loss": 0.6591688990592957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23907506465911865, "epoch": 8.08, "learning_rate": 9.581572273879967e-06, "loss": 0.3425, "step": 9563, "task_loss": 0.7152498960494995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24413537979125977, "epoch": 8.08, "learning_rate": 9.577345731191885e-06, "loss": 0.3215, "step": 9564, "task_loss": 0.31466352939605713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5873289704322815, "epoch": 8.09, "learning_rate": 9.573119188503805e-06, "loss": 0.5086, "step": 9565, "task_loss": 0.6502830982208252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15625923871994019, "epoch": 8.09, "learning_rate": 9.568892645815723e-06, "loss": 0.4039, "step": 9566, "task_loss": 0.03125901147723198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30555737018585205, "epoch": 8.09, "learning_rate": 9.564666103127642e-06, "loss": 0.3101, "step": 9567, "task_loss": 0.6276337504386902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6364552974700928, "epoch": 8.09, "learning_rate": 9.56043956043956e-06, "loss": 0.426, "step": 9568, "task_loss": 1.0174585580825806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4175097644329071, "epoch": 8.09, "learning_rate": 9.55621301775148e-06, "loss": 0.4443, "step": 9569, "task_loss": 1.0049359798431396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6983851790428162, "epoch": 8.09, "learning_rate": 9.551986475063398e-06, "loss": 0.479, "step": 9570, "task_loss": 0.677760660648346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40620875358581543, "epoch": 8.09, "learning_rate": 9.547759932375318e-06, "loss": 0.2764, "step": 9571, "task_loss": 0.9887720942497253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3389171063899994, "epoch": 8.09, "learning_rate": 9.543533389687238e-06, "loss": 0.2897, "step": 9572, "task_loss": 0.5915806293487549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7092840671539307, "epoch": 8.09, "learning_rate": 9.539306846999154e-06, "loss": 0.323, "step": 9573, "task_loss": 1.0367742776870728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3712989389896393, "epoch": 8.09, "learning_rate": 9.535080304311074e-06, "loss": 0.6249, "step": 9574, "task_loss": 0.333400160074234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40600138902664185, "epoch": 8.09, "learning_rate": 9.530853761622994e-06, "loss": 0.3419, "step": 9575, "task_loss": 0.649448812007904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27573418617248535, "epoch": 8.09, "learning_rate": 9.526627218934912e-06, "loss": 0.3511, "step": 9576, "task_loss": 1.095445156097412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30102992057800293, "epoch": 8.1, "learning_rate": 9.52240067624683e-06, "loss": 0.3486, "step": 9577, "task_loss": 0.22039470076560974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3979472219944, "epoch": 8.1, "learning_rate": 9.51817413355875e-06, "loss": 0.3315, "step": 9578, "task_loss": 0.41883575916290283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6579056978225708, "epoch": 8.1, "learning_rate": 9.513947590870669e-06, "loss": 0.4493, "step": 9579, "task_loss": 0.9873151183128357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.50331050157547, "epoch": 8.1, "learning_rate": 9.509721048182587e-06, "loss": 0.2954, "step": 9580, "task_loss": 0.2561885118484497 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17993547022342682, "epoch": 8.1, "learning_rate": 9.505494505494505e-06, "loss": 0.4296, "step": 9581, "task_loss": 0.47484517097473145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1356126070022583, "epoch": 8.1, "learning_rate": 9.501267962806425e-06, "loss": 0.2887, "step": 9582, "task_loss": 0.5511762499809265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38288500905036926, "epoch": 8.1, "learning_rate": 9.497041420118345e-06, "loss": 0.3238, "step": 9583, "task_loss": 0.6213030815124512 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6185067892074585, "epoch": 8.1, "learning_rate": 9.492814877430263e-06, "loss": 0.484, "step": 9584, "task_loss": 0.45754849910736084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24267931282520294, "epoch": 8.1, "learning_rate": 9.48858833474218e-06, "loss": 0.3728, "step": 9585, "task_loss": 0.8542752861976624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25541970133781433, "epoch": 8.1, "learning_rate": 9.4843617920541e-06, "loss": 0.3445, "step": 9586, "task_loss": 0.3271564245223999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42707839608192444, "epoch": 8.1, "learning_rate": 9.480135249366019e-06, "loss": 0.433, "step": 9587, "task_loss": 0.9447238445281982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26518845558166504, "epoch": 8.1, "learning_rate": 9.475908706677938e-06, "loss": 0.3891, "step": 9588, "task_loss": 0.30231887102127075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2763517498970032, "epoch": 8.11, "learning_rate": 9.471682163989856e-06, "loss": 0.3606, "step": 9589, "task_loss": 0.38724765181541443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3539882004261017, "epoch": 8.11, "learning_rate": 9.467455621301776e-06, "loss": 0.4567, "step": 9590, "task_loss": 0.6587515473365784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3378356993198395, "epoch": 8.11, "learning_rate": 9.463229078613694e-06, "loss": 0.4536, "step": 9591, "task_loss": 0.8763216137886047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35996103286743164, "epoch": 8.11, "learning_rate": 9.459002535925614e-06, "loss": 0.3752, "step": 9592, "task_loss": 1.6664866209030151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3141550123691559, "epoch": 8.11, "learning_rate": 9.454775993237532e-06, "loss": 0.4192, "step": 9593, "task_loss": 0.5907471179962158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.316249817609787, "epoch": 8.11, "learning_rate": 9.45054945054945e-06, "loss": 0.3713, "step": 9594, "task_loss": 0.2997492253780365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5545806288719177, "epoch": 8.11, "learning_rate": 9.44632290786137e-06, "loss": 0.4512, "step": 9595, "task_loss": 1.0716569423675537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38335737586021423, "epoch": 8.11, "learning_rate": 9.44209636517329e-06, "loss": 0.3363, "step": 9596, "task_loss": 0.36720141768455505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3329651951789856, "epoch": 8.11, "learning_rate": 9.437869822485207e-06, "loss": 0.2881, "step": 9597, "task_loss": 0.6670209169387817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5699474811553955, "epoch": 8.11, "learning_rate": 9.433643279797126e-06, "loss": 0.4345, "step": 9598, "task_loss": 0.3198203146457672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5423785448074341, "epoch": 8.11, "learning_rate": 9.429416737109045e-06, "loss": 0.5017, "step": 9599, "task_loss": 1.515604019165039 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34926584362983704, "epoch": 8.11, "learning_rate": 9.425190194420965e-06, "loss": 0.4044, "step": 9600, "task_loss": 1.0147638320922852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4368707835674286, "epoch": 8.12, "learning_rate": 9.420963651732883e-06, "loss": 0.3993, "step": 9601, "task_loss": 1.1315109729766846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36856797337532043, "epoch": 8.12, "learning_rate": 9.416737109044801e-06, "loss": 0.3545, "step": 9602, "task_loss": 0.6522079110145569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3776322901248932, "epoch": 8.12, "learning_rate": 9.41251056635672e-06, "loss": 0.5057, "step": 9603, "task_loss": 1.1521193981170654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24178169667720795, "epoch": 8.12, "learning_rate": 9.40828402366864e-06, "loss": 0.342, "step": 9604, "task_loss": 0.25959545373916626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5928804278373718, "epoch": 8.12, "learning_rate": 9.404057480980559e-06, "loss": 0.5401, "step": 9605, "task_loss": 1.4422557353973389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34911155700683594, "epoch": 8.12, "learning_rate": 9.399830938292477e-06, "loss": 0.3286, "step": 9606, "task_loss": 0.33856016397476196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5973302721977234, "epoch": 8.12, "learning_rate": 9.395604395604396e-06, "loss": 0.4587, "step": 9607, "task_loss": 0.3795664608478546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3140881061553955, "epoch": 8.12, "learning_rate": 9.391377852916314e-06, "loss": 0.3214, "step": 9608, "task_loss": 0.20383039116859436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2899821102619171, "epoch": 8.12, "learning_rate": 9.387151310228234e-06, "loss": 0.3746, "step": 9609, "task_loss": 0.1717749387025833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5864571332931519, "epoch": 8.12, "learning_rate": 9.382924767540152e-06, "loss": 0.3729, "step": 9610, "task_loss": 0.9384562969207764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33046454191207886, "epoch": 8.12, "learning_rate": 9.378698224852072e-06, "loss": 0.2951, "step": 9611, "task_loss": 0.7030954957008362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29536837339401245, "epoch": 8.13, "learning_rate": 9.37447168216399e-06, "loss": 0.5172, "step": 9612, "task_loss": 0.6117832660675049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2744894027709961, "epoch": 8.13, "learning_rate": 9.37024513947591e-06, "loss": 0.3147, "step": 9613, "task_loss": 0.4742885231971741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2914201021194458, "epoch": 8.13, "learning_rate": 9.366018596787828e-06, "loss": 0.3221, "step": 9614, "task_loss": 1.2450988292694092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3952803313732147, "epoch": 8.13, "learning_rate": 9.361792054099748e-06, "loss": 0.3611, "step": 9615, "task_loss": 0.5938776135444641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28048014640808105, "epoch": 8.13, "learning_rate": 9.357565511411666e-06, "loss": 0.3374, "step": 9616, "task_loss": 1.7317402362823486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.380979061126709, "epoch": 8.13, "learning_rate": 9.353338968723585e-06, "loss": 0.4017, "step": 9617, "task_loss": 0.19807791709899902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3415282368659973, "epoch": 8.13, "learning_rate": 9.349112426035503e-06, "loss": 0.2835, "step": 9618, "task_loss": 0.6596552729606628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3305451273918152, "epoch": 8.13, "learning_rate": 9.344885883347421e-06, "loss": 0.4564, "step": 9619, "task_loss": 1.1519514322280884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6515567898750305, "epoch": 8.13, "learning_rate": 9.340659340659341e-06, "loss": 0.3777, "step": 9620, "task_loss": 0.9401289224624634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24718138575553894, "epoch": 8.13, "learning_rate": 9.336432797971261e-06, "loss": 0.3209, "step": 9621, "task_loss": 1.3335468769073486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4232349395751953, "epoch": 8.13, "learning_rate": 9.332206255283179e-06, "loss": 0.4646, "step": 9622, "task_loss": 0.727961540222168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45962831377983093, "epoch": 8.13, "learning_rate": 9.327979712595097e-06, "loss": 0.4045, "step": 9623, "task_loss": 0.4596060812473297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3557490110397339, "epoch": 8.14, "learning_rate": 9.323753169907017e-06, "loss": 0.3699, "step": 9624, "task_loss": 1.2663525342941284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2863536477088928, "epoch": 8.14, "learning_rate": 9.319526627218936e-06, "loss": 0.3268, "step": 9625, "task_loss": 0.09632904082536697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22954314947128296, "epoch": 8.14, "learning_rate": 9.315300084530853e-06, "loss": 0.2577, "step": 9626, "task_loss": 0.15313583612442017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3341369032859802, "epoch": 8.14, "learning_rate": 9.311073541842773e-06, "loss": 0.4661, "step": 9627, "task_loss": 0.893329381942749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26209360361099243, "epoch": 8.14, "learning_rate": 9.306846999154692e-06, "loss": 0.405, "step": 9628, "task_loss": 0.04880192503333092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.528457760810852, "epoch": 8.14, "learning_rate": 9.30262045646661e-06, "loss": 0.4759, "step": 9629, "task_loss": 1.0006113052368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18417946994304657, "epoch": 8.14, "learning_rate": 9.298393913778528e-06, "loss": 0.3162, "step": 9630, "task_loss": 0.18520507216453552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.270965576171875, "epoch": 8.14, "learning_rate": 9.294167371090448e-06, "loss": 0.2886, "step": 9631, "task_loss": 0.11668547242879868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32560622692108154, "epoch": 8.14, "learning_rate": 9.289940828402368e-06, "loss": 0.3888, "step": 9632, "task_loss": 0.3255392909049988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.09497303515672684, "epoch": 8.14, "learning_rate": 9.285714285714286e-06, "loss": 0.348, "step": 9633, "task_loss": 0.03355012834072113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4106968939304352, "epoch": 8.14, "learning_rate": 9.281487743026206e-06, "loss": 0.4313, "step": 9634, "task_loss": 1.1803704500198364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5191466808319092, "epoch": 8.14, "learning_rate": 9.277261200338124e-06, "loss": 0.3892, "step": 9635, "task_loss": 0.6688359975814819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6053226590156555, "epoch": 8.15, "learning_rate": 9.273034657650043e-06, "loss": 0.5255, "step": 9636, "task_loss": 0.32775962352752686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39070868492126465, "epoch": 8.15, "learning_rate": 9.268808114961961e-06, "loss": 0.4261, "step": 9637, "task_loss": 0.3421628177165985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5107582807540894, "epoch": 8.15, "learning_rate": 9.264581572273881e-06, "loss": 0.4467, "step": 9638, "task_loss": 0.6848765015602112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16719526052474976, "epoch": 8.15, "learning_rate": 9.2603550295858e-06, "loss": 0.3027, "step": 9639, "task_loss": 0.02444460801780224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3925929665565491, "epoch": 8.15, "learning_rate": 9.256128486897717e-06, "loss": 0.4696, "step": 9640, "task_loss": 0.3108866512775421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18128415942192078, "epoch": 8.15, "learning_rate": 9.251901944209637e-06, "loss": 0.3648, "step": 9641, "task_loss": 0.47694361209869385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36158427596092224, "epoch": 8.15, "learning_rate": 9.247675401521557e-06, "loss": 0.4199, "step": 9642, "task_loss": 0.527518630027771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.613684892654419, "epoch": 8.15, "learning_rate": 9.243448858833475e-06, "loss": 0.3951, "step": 9643, "task_loss": 1.2893481254577637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6827993988990784, "epoch": 8.15, "learning_rate": 9.239222316145393e-06, "loss": 0.3756, "step": 9644, "task_loss": 0.5007761716842651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33941271901130676, "epoch": 8.15, "learning_rate": 9.234995773457313e-06, "loss": 0.4091, "step": 9645, "task_loss": 0.48540809750556946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31250306963920593, "epoch": 8.15, "learning_rate": 9.230769230769232e-06, "loss": 0.3187, "step": 9646, "task_loss": 0.24228867888450623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33825749158859253, "epoch": 8.15, "learning_rate": 9.22654268808115e-06, "loss": 0.5415, "step": 9647, "task_loss": 1.082698106765747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3332681953907013, "epoch": 8.16, "learning_rate": 9.222316145393068e-06, "loss": 0.3263, "step": 9648, "task_loss": 1.0833981037139893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6021872758865356, "epoch": 8.16, "learning_rate": 9.218089602704988e-06, "loss": 0.5312, "step": 9649, "task_loss": 0.14616775512695312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.344882994890213, "epoch": 8.16, "learning_rate": 9.213863060016908e-06, "loss": 0.4013, "step": 9650, "task_loss": 0.4697834849357605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3713876008987427, "epoch": 8.16, "learning_rate": 9.209636517328824e-06, "loss": 0.3729, "step": 9651, "task_loss": 0.9847646951675415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5445247888565063, "epoch": 8.16, "learning_rate": 9.205409974640744e-06, "loss": 0.34, "step": 9652, "task_loss": 0.3472640812397003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31654882431030273, "epoch": 8.16, "learning_rate": 9.201183431952664e-06, "loss": 0.4552, "step": 9653, "task_loss": 0.35890087485313416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4858705401420593, "epoch": 8.16, "learning_rate": 9.196956889264582e-06, "loss": 0.3702, "step": 9654, "task_loss": 0.8788583874702454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27804529666900635, "epoch": 8.16, "learning_rate": 9.1927303465765e-06, "loss": 0.3869, "step": 9655, "task_loss": 0.6036635041236877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5971202850341797, "epoch": 8.16, "learning_rate": 9.18850380388842e-06, "loss": 0.436, "step": 9656, "task_loss": 0.9230960011482239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47867780923843384, "epoch": 8.16, "learning_rate": 9.18427726120034e-06, "loss": 0.3747, "step": 9657, "task_loss": 1.0159308910369873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6197766661643982, "epoch": 8.16, "learning_rate": 9.180050718512257e-06, "loss": 0.3975, "step": 9658, "task_loss": 0.32422152161598206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2516373097896576, "epoch": 8.16, "learning_rate": 9.175824175824175e-06, "loss": 0.4312, "step": 9659, "task_loss": 0.6875790953636169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28631240129470825, "epoch": 8.17, "learning_rate": 9.171597633136095e-06, "loss": 0.3234, "step": 9660, "task_loss": 1.2962371110916138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38545215129852295, "epoch": 8.17, "learning_rate": 9.167371090448013e-06, "loss": 0.3394, "step": 9661, "task_loss": 0.37363725900650024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3250458240509033, "epoch": 8.17, "learning_rate": 9.163144547759933e-06, "loss": 0.2828, "step": 9662, "task_loss": 0.7471491098403931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1687881499528885, "epoch": 8.17, "learning_rate": 9.158918005071853e-06, "loss": 0.2651, "step": 9663, "task_loss": 0.0961441919207573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.299958199262619, "epoch": 8.17, "learning_rate": 9.15469146238377e-06, "loss": 0.3634, "step": 9664, "task_loss": 0.8547005653381348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5923066139221191, "epoch": 8.17, "learning_rate": 9.150464919695689e-06, "loss": 0.4031, "step": 9665, "task_loss": 0.9815897345542908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4318762421607971, "epoch": 8.17, "learning_rate": 9.146238377007608e-06, "loss": 0.5062, "step": 9666, "task_loss": 0.6969980001449585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35791635513305664, "epoch": 8.17, "learning_rate": 9.142011834319528e-06, "loss": 0.3011, "step": 9667, "task_loss": 0.42164865136146545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23975449800491333, "epoch": 8.17, "learning_rate": 9.137785291631446e-06, "loss": 0.2788, "step": 9668, "task_loss": 0.5259520411491394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3312453031539917, "epoch": 8.17, "learning_rate": 9.133558748943364e-06, "loss": 0.3475, "step": 9669, "task_loss": 0.928041934967041 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.319538950920105, "epoch": 8.17, "learning_rate": 9.129332206255284e-06, "loss": 0.35, "step": 9670, "task_loss": 0.7993912696838379 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18613344430923462, "epoch": 8.17, "learning_rate": 9.125105663567204e-06, "loss": 0.3406, "step": 9671, "task_loss": 0.4923597574234009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6443765759468079, "epoch": 8.18, "learning_rate": 9.12087912087912e-06, "loss": 0.432, "step": 9672, "task_loss": 1.3538999557495117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30873745679855347, "epoch": 8.18, "learning_rate": 9.11665257819104e-06, "loss": 0.3048, "step": 9673, "task_loss": 0.7474343180656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32002562284469604, "epoch": 8.18, "learning_rate": 9.11242603550296e-06, "loss": 0.2717, "step": 9674, "task_loss": 0.9239750504493713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32411304116249084, "epoch": 8.18, "learning_rate": 9.108199492814878e-06, "loss": 0.2731, "step": 9675, "task_loss": 0.2891104817390442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31777292490005493, "epoch": 8.18, "learning_rate": 9.103972950126796e-06, "loss": 0.4542, "step": 9676, "task_loss": 1.070147156715393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34139904379844666, "epoch": 8.18, "learning_rate": 9.099746407438715e-06, "loss": 0.3966, "step": 9677, "task_loss": 0.21527938544750214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3053198456764221, "epoch": 8.18, "learning_rate": 9.095519864750635e-06, "loss": 0.4166, "step": 9678, "task_loss": 0.9368047714233398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23771579563617706, "epoch": 8.18, "learning_rate": 9.091293322062553e-06, "loss": 0.4659, "step": 9679, "task_loss": 0.3267645239830017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26075536012649536, "epoch": 8.18, "learning_rate": 9.087066779374471e-06, "loss": 0.2942, "step": 9680, "task_loss": 0.0958411917090416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42152759432792664, "epoch": 8.18, "learning_rate": 9.082840236686391e-06, "loss": 0.3547, "step": 9681, "task_loss": 0.39244091510772705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3182089030742645, "epoch": 8.18, "learning_rate": 9.07861369399831e-06, "loss": 0.4266, "step": 9682, "task_loss": 0.2878417670726776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33279967308044434, "epoch": 8.19, "learning_rate": 9.074387151310229e-06, "loss": 0.4432, "step": 9683, "task_loss": 0.1174153983592987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40457475185394287, "epoch": 8.19, "learning_rate": 9.070160608622147e-06, "loss": 0.3944, "step": 9684, "task_loss": 0.8658977746963501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29502081871032715, "epoch": 8.19, "learning_rate": 9.065934065934067e-06, "loss": 0.2998, "step": 9685, "task_loss": 1.1563267707824707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4002039134502411, "epoch": 8.19, "learning_rate": 9.061707523245985e-06, "loss": 0.3236, "step": 9686, "task_loss": 1.067454218864441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4502770006656647, "epoch": 8.19, "learning_rate": 9.057480980557904e-06, "loss": 0.414, "step": 9687, "task_loss": 0.9178832173347473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1718170940876007, "epoch": 8.19, "learning_rate": 9.053254437869822e-06, "loss": 0.4125, "step": 9688, "task_loss": 0.5020856857299805 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18060216307640076, "epoch": 8.19, "learning_rate": 9.049027895181742e-06, "loss": 0.4159, "step": 9689, "task_loss": 0.030078237876296043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27984631061553955, "epoch": 8.19, "learning_rate": 9.04480135249366e-06, "loss": 0.4477, "step": 9690, "task_loss": 0.9567302465438843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27652040123939514, "epoch": 8.19, "learning_rate": 9.04057480980558e-06, "loss": 0.3729, "step": 9691, "task_loss": 0.6109839081764221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5897128582000732, "epoch": 8.19, "learning_rate": 9.0363482671175e-06, "loss": 0.549, "step": 9692, "task_loss": 0.9462595582008362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24445638060569763, "epoch": 8.19, "learning_rate": 9.032121724429416e-06, "loss": 0.3746, "step": 9693, "task_loss": 0.09894224256277084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3792136311531067, "epoch": 8.19, "learning_rate": 9.027895181741336e-06, "loss": 0.3604, "step": 9694, "task_loss": 0.9250402450561523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2336384654045105, "epoch": 8.2, "learning_rate": 9.023668639053255e-06, "loss": 0.321, "step": 9695, "task_loss": 0.8424035906791687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5347297191619873, "epoch": 8.2, "learning_rate": 9.019442096365173e-06, "loss": 0.4832, "step": 9696, "task_loss": 0.27140286564826965 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2559162378311157, "epoch": 8.2, "learning_rate": 9.015215553677092e-06, "loss": 0.3016, "step": 9697, "task_loss": 0.12297423928976059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1972401887178421, "epoch": 8.2, "learning_rate": 9.010989010989011e-06, "loss": 0.3652, "step": 9698, "task_loss": 0.7072030901908875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2507309019565582, "epoch": 8.2, "learning_rate": 9.006762468300931e-06, "loss": 0.3587, "step": 9699, "task_loss": 0.12716975808143616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35743248462677, "epoch": 8.2, "learning_rate": 9.002535925612849e-06, "loss": 0.3417, "step": 9700, "task_loss": 0.4626259207725525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2359066754579544, "epoch": 8.2, "learning_rate": 8.998309382924767e-06, "loss": 0.2987, "step": 9701, "task_loss": 0.16904082894325256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2418484091758728, "epoch": 8.2, "learning_rate": 8.994082840236687e-06, "loss": 0.3256, "step": 9702, "task_loss": 0.4192400872707367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17623473703861237, "epoch": 8.2, "learning_rate": 8.989856297548607e-06, "loss": 0.3374, "step": 9703, "task_loss": 0.8667232394218445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4268370270729065, "epoch": 8.2, "learning_rate": 8.985629754860525e-06, "loss": 0.406, "step": 9704, "task_loss": 1.2267112731933594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4142894744873047, "epoch": 8.2, "learning_rate": 8.981403212172443e-06, "loss": 0.4862, "step": 9705, "task_loss": 0.9634127616882324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4781948924064636, "epoch": 8.2, "learning_rate": 8.977176669484362e-06, "loss": 0.5278, "step": 9706, "task_loss": 0.8409520387649536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31174859404563904, "epoch": 8.21, "learning_rate": 8.97295012679628e-06, "loss": 0.2603, "step": 9707, "task_loss": 0.25963011384010315 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5368709564208984, "epoch": 8.21, "learning_rate": 8.9687235841082e-06, "loss": 0.4164, "step": 9708, "task_loss": 1.1820242404937744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35164427757263184, "epoch": 8.21, "learning_rate": 8.964497041420118e-06, "loss": 0.4737, "step": 9709, "task_loss": 0.6053301692008972 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22200173139572144, "epoch": 8.21, "learning_rate": 8.960270498732038e-06, "loss": 0.2778, "step": 9710, "task_loss": 0.1624266505241394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2969588339328766, "epoch": 8.21, "learning_rate": 8.956043956043956e-06, "loss": 0.3573, "step": 9711, "task_loss": 0.5570029616355896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4154408574104309, "epoch": 8.21, "learning_rate": 8.951817413355876e-06, "loss": 0.4129, "step": 9712, "task_loss": 0.13733650743961334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4138985574245453, "epoch": 8.21, "learning_rate": 8.947590870667794e-06, "loss": 0.4428, "step": 9713, "task_loss": 0.4884899854660034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3642539381980896, "epoch": 8.21, "learning_rate": 8.943364327979714e-06, "loss": 0.3472, "step": 9714, "task_loss": 0.6372573375701904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2805880606174469, "epoch": 8.21, "learning_rate": 8.939137785291632e-06, "loss": 0.379, "step": 9715, "task_loss": 0.6021783947944641 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20688527822494507, "epoch": 8.21, "learning_rate": 8.934911242603551e-06, "loss": 0.3272, "step": 9716, "task_loss": 0.31165313720703125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30859261751174927, "epoch": 8.21, "learning_rate": 8.93068469991547e-06, "loss": 0.3735, "step": 9717, "task_loss": 0.10539247840642929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4432902932167053, "epoch": 8.21, "learning_rate": 8.926458157227387e-06, "loss": 0.3591, "step": 9718, "task_loss": 0.9530718922615051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.678508460521698, "epoch": 8.22, "learning_rate": 8.922231614539307e-06, "loss": 0.462, "step": 9719, "task_loss": 0.8696460723876953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8330650925636292, "epoch": 8.22, "learning_rate": 8.918005071851227e-06, "loss": 0.495, "step": 9720, "task_loss": 0.858477771282196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31208711862564087, "epoch": 8.22, "learning_rate": 8.913778529163145e-06, "loss": 0.3016, "step": 9721, "task_loss": 1.2879819869995117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40455615520477295, "epoch": 8.22, "learning_rate": 8.909551986475063e-06, "loss": 0.4065, "step": 9722, "task_loss": 0.3742080330848694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5005300641059875, "epoch": 8.22, "learning_rate": 8.905325443786983e-06, "loss": 0.3483, "step": 9723, "task_loss": 0.8587540984153748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20778794586658478, "epoch": 8.22, "learning_rate": 8.901098901098902e-06, "loss": 0.3045, "step": 9724, "task_loss": 0.11371275782585144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23917904496192932, "epoch": 8.22, "learning_rate": 8.89687235841082e-06, "loss": 0.3445, "step": 9725, "task_loss": 0.12882114946842194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33064234256744385, "epoch": 8.22, "learning_rate": 8.892645815722739e-06, "loss": 0.3554, "step": 9726, "task_loss": 0.4286997616291046 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.538514256477356, "epoch": 8.22, "learning_rate": 8.888419273034658e-06, "loss": 0.4464, "step": 9727, "task_loss": 0.7297476530075073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2555297017097473, "epoch": 8.22, "learning_rate": 8.884192730346576e-06, "loss": 0.3279, "step": 9728, "task_loss": 0.6955799460411072 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3818511366844177, "epoch": 8.22, "learning_rate": 8.879966187658496e-06, "loss": 0.4628, "step": 9729, "task_loss": 1.0754139423370361 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24143879115581512, "epoch": 8.22, "learning_rate": 8.875739644970414e-06, "loss": 0.2392, "step": 9730, "task_loss": 0.2514159679412842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19504114985466003, "epoch": 8.23, "learning_rate": 8.871513102282334e-06, "loss": 0.2869, "step": 9731, "task_loss": 0.8008387684822083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22287370264530182, "epoch": 8.23, "learning_rate": 8.867286559594252e-06, "loss": 0.3264, "step": 9732, "task_loss": 0.4593740403652191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3432191014289856, "epoch": 8.23, "learning_rate": 8.863060016906172e-06, "loss": 0.4326, "step": 9733, "task_loss": 0.6860237717628479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4067971110343933, "epoch": 8.23, "learning_rate": 8.85883347421809e-06, "loss": 0.322, "step": 9734, "task_loss": 0.3725316822528839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.260647714138031, "epoch": 8.23, "learning_rate": 8.85460693153001e-06, "loss": 0.3818, "step": 9735, "task_loss": 0.2765454947948456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4696386456489563, "epoch": 8.23, "learning_rate": 8.850380388841927e-06, "loss": 0.5069, "step": 9736, "task_loss": 0.48660120368003845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4835379719734192, "epoch": 8.23, "learning_rate": 8.846153846153847e-06, "loss": 0.4994, "step": 9737, "task_loss": 0.6464107036590576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21387115120887756, "epoch": 8.23, "learning_rate": 8.841927303465765e-06, "loss": 0.319, "step": 9738, "task_loss": 0.34822961688041687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3433699905872345, "epoch": 8.23, "learning_rate": 8.837700760777683e-06, "loss": 0.3185, "step": 9739, "task_loss": 0.9515796303749084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5184146761894226, "epoch": 8.23, "learning_rate": 8.833474218089603e-06, "loss": 0.5044, "step": 9740, "task_loss": 1.6453769207000732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4171653389930725, "epoch": 8.23, "learning_rate": 8.829247675401523e-06, "loss": 0.3496, "step": 9741, "task_loss": 0.282680869102478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32187342643737793, "epoch": 8.23, "learning_rate": 8.82502113271344e-06, "loss": 0.3367, "step": 9742, "task_loss": 0.8890480399131775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3497220575809479, "epoch": 8.24, "learning_rate": 8.820794590025359e-06, "loss": 0.3651, "step": 9743, "task_loss": 0.7869232892990112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42103883624076843, "epoch": 8.24, "learning_rate": 8.816568047337279e-06, "loss": 0.3192, "step": 9744, "task_loss": 0.5570975542068481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5558339953422546, "epoch": 8.24, "learning_rate": 8.812341504649198e-06, "loss": 0.3757, "step": 9745, "task_loss": 1.01534903049469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3667979836463928, "epoch": 8.24, "learning_rate": 8.808114961961116e-06, "loss": 0.4696, "step": 9746, "task_loss": 0.8238222002983093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3211827874183655, "epoch": 8.24, "learning_rate": 8.803888419273034e-06, "loss": 0.3532, "step": 9747, "task_loss": 0.29792577028274536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2905444800853729, "epoch": 8.24, "learning_rate": 8.799661876584954e-06, "loss": 0.3396, "step": 9748, "task_loss": 0.5318219661712646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26383304595947266, "epoch": 8.24, "learning_rate": 8.795435333896874e-06, "loss": 0.3369, "step": 9749, "task_loss": 0.9646943807601929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2157078981399536, "epoch": 8.24, "learning_rate": 8.791208791208792e-06, "loss": 0.2285, "step": 9750, "task_loss": 0.3005932867527008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4590211510658264, "epoch": 8.24, "learning_rate": 8.78698224852071e-06, "loss": 0.4223, "step": 9751, "task_loss": 0.9414132237434387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2220081239938736, "epoch": 8.24, "learning_rate": 8.78275570583263e-06, "loss": 0.3047, "step": 9752, "task_loss": 0.19204185903072357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3985971510410309, "epoch": 8.24, "learning_rate": 8.778529163144548e-06, "loss": 0.3543, "step": 9753, "task_loss": 0.2390933483839035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30922242999076843, "epoch": 8.24, "learning_rate": 8.774302620456468e-06, "loss": 0.3615, "step": 9754, "task_loss": 0.23663471639156342 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4071299433708191, "epoch": 8.25, "learning_rate": 8.770076077768386e-06, "loss": 0.4329, "step": 9755, "task_loss": 0.7447525858879089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41912806034088135, "epoch": 8.25, "learning_rate": 8.765849535080305e-06, "loss": 0.3616, "step": 9756, "task_loss": 0.19413790106773376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34253525733947754, "epoch": 8.25, "learning_rate": 8.761622992392223e-06, "loss": 0.404, "step": 9757, "task_loss": 0.7007615566253662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4242688715457916, "epoch": 8.25, "learning_rate": 8.757396449704143e-06, "loss": 0.3737, "step": 9758, "task_loss": 0.6549898982048035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24502715468406677, "epoch": 8.25, "learning_rate": 8.753169907016061e-06, "loss": 0.3152, "step": 9759, "task_loss": 0.6109122037887573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15375986695289612, "epoch": 8.25, "learning_rate": 8.74894336432798e-06, "loss": 0.3072, "step": 9760, "task_loss": 0.29879170656204224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4089164137840271, "epoch": 8.25, "learning_rate": 8.744716821639899e-06, "loss": 0.3149, "step": 9761, "task_loss": 0.6299025416374207 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.386701762676239, "epoch": 8.25, "learning_rate": 8.740490278951819e-06, "loss": 0.414, "step": 9762, "task_loss": 0.2943950891494751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19215984642505646, "epoch": 8.25, "learning_rate": 8.736263736263737e-06, "loss": 0.3614, "step": 9763, "task_loss": 0.6087113618850708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28922000527381897, "epoch": 8.25, "learning_rate": 8.732037193575655e-06, "loss": 0.3482, "step": 9764, "task_loss": 0.4135283827781677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40279993414878845, "epoch": 8.25, "learning_rate": 8.727810650887574e-06, "loss": 0.3107, "step": 9765, "task_loss": 0.5130314230918884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3185824155807495, "epoch": 8.26, "learning_rate": 8.723584108199494e-06, "loss": 0.4994, "step": 9766, "task_loss": 0.2831957936286926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2042769491672516, "epoch": 8.26, "learning_rate": 8.719357565511412e-06, "loss": 0.3691, "step": 9767, "task_loss": 0.3684547245502472 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27875420451164246, "epoch": 8.26, "learning_rate": 8.71513102282333e-06, "loss": 0.379, "step": 9768, "task_loss": 0.7846376895904541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4328498840332031, "epoch": 8.26, "learning_rate": 8.71090448013525e-06, "loss": 0.5883, "step": 9769, "task_loss": 0.4839381277561188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49871253967285156, "epoch": 8.26, "learning_rate": 8.70667793744717e-06, "loss": 0.4333, "step": 9770, "task_loss": 0.3765486478805542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36395588517189026, "epoch": 8.26, "learning_rate": 8.702451394759086e-06, "loss": 0.3618, "step": 9771, "task_loss": 0.21075135469436646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40310561656951904, "epoch": 8.26, "learning_rate": 8.698224852071006e-06, "loss": 0.4033, "step": 9772, "task_loss": 0.6758021712303162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24684014916419983, "epoch": 8.26, "learning_rate": 8.693998309382926e-06, "loss": 0.3499, "step": 9773, "task_loss": 0.10768115520477295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28706932067871094, "epoch": 8.26, "learning_rate": 8.689771766694844e-06, "loss": 0.3335, "step": 9774, "task_loss": 0.1497696489095688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36292895674705505, "epoch": 8.26, "learning_rate": 8.685545224006762e-06, "loss": 0.3654, "step": 9775, "task_loss": 0.5850411653518677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4508146643638611, "epoch": 8.26, "learning_rate": 8.681318681318681e-06, "loss": 0.3083, "step": 9776, "task_loss": 1.4609848260879517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34915974736213684, "epoch": 8.26, "learning_rate": 8.677092138630601e-06, "loss": 0.4138, "step": 9777, "task_loss": 0.7426593899726868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.615005373954773, "epoch": 8.27, "learning_rate": 8.67286559594252e-06, "loss": 0.4669, "step": 9778, "task_loss": 0.573525071144104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17758822441101074, "epoch": 8.27, "learning_rate": 8.668639053254437e-06, "loss": 0.3033, "step": 9779, "task_loss": 0.8016296625137329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16171646118164062, "epoch": 8.27, "learning_rate": 8.664412510566357e-06, "loss": 0.3355, "step": 9780, "task_loss": 0.05476692318916321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44864290952682495, "epoch": 8.27, "learning_rate": 8.660185967878277e-06, "loss": 0.4514, "step": 9781, "task_loss": 1.1707797050476074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2376815378665924, "epoch": 8.27, "learning_rate": 8.655959425190195e-06, "loss": 0.3855, "step": 9782, "task_loss": 0.19468216598033905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3082500696182251, "epoch": 8.27, "learning_rate": 8.651732882502115e-06, "loss": 0.3407, "step": 9783, "task_loss": 0.6495285034179688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6310072541236877, "epoch": 8.27, "learning_rate": 8.647506339814033e-06, "loss": 0.4792, "step": 9784, "task_loss": 1.4811997413635254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33435362577438354, "epoch": 8.27, "learning_rate": 8.64327979712595e-06, "loss": 0.4131, "step": 9785, "task_loss": 0.7153709530830383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33940380811691284, "epoch": 8.27, "learning_rate": 8.63905325443787e-06, "loss": 0.3645, "step": 9786, "task_loss": 1.0010336637496948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35735416412353516, "epoch": 8.27, "learning_rate": 8.63482671174979e-06, "loss": 0.3711, "step": 9787, "task_loss": 0.38904234766960144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3098819851875305, "epoch": 8.27, "learning_rate": 8.630600169061708e-06, "loss": 0.329, "step": 9788, "task_loss": 0.8288509845733643 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36749595403671265, "epoch": 8.27, "learning_rate": 8.626373626373626e-06, "loss": 0.3569, "step": 9789, "task_loss": 0.582553505897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45635986328125, "epoch": 8.28, "learning_rate": 8.622147083685546e-06, "loss": 0.2987, "step": 9790, "task_loss": 0.30947017669677734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2071198672056198, "epoch": 8.28, "learning_rate": 8.617920540997466e-06, "loss": 0.3581, "step": 9791, "task_loss": 0.6297392845153809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29579681158065796, "epoch": 8.28, "learning_rate": 8.613693998309382e-06, "loss": 0.2889, "step": 9792, "task_loss": 1.2318494319915771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4114759862422943, "epoch": 8.28, "learning_rate": 8.609467455621302e-06, "loss": 0.36, "step": 9793, "task_loss": 0.5111846923828125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44780099391937256, "epoch": 8.28, "learning_rate": 8.605240912933221e-06, "loss": 0.3257, "step": 9794, "task_loss": 0.6333122849464417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3183003067970276, "epoch": 8.28, "learning_rate": 8.601014370245141e-06, "loss": 0.272, "step": 9795, "task_loss": 0.6065946221351624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6599653363227844, "epoch": 8.28, "learning_rate": 8.596787827557058e-06, "loss": 0.4934, "step": 9796, "task_loss": 0.48435166478157043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41480788588523865, "epoch": 8.28, "learning_rate": 8.592561284868977e-06, "loss": 0.4061, "step": 9797, "task_loss": 1.2546712160110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3400757312774658, "epoch": 8.28, "learning_rate": 8.588334742180897e-06, "loss": 0.2292, "step": 9798, "task_loss": 0.17973552644252777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28583288192749023, "epoch": 8.28, "learning_rate": 8.584108199492815e-06, "loss": 0.3209, "step": 9799, "task_loss": 0.3710036277770996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.9102689027786255, "epoch": 8.28, "learning_rate": 8.579881656804733e-06, "loss": 0.4699, "step": 9800, "task_loss": 0.6184324622154236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3006907105445862, "epoch": 8.28, "learning_rate": 8.575655114116653e-06, "loss": 0.309, "step": 9801, "task_loss": 0.6547111868858337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25536441802978516, "epoch": 8.29, "learning_rate": 8.571428571428573e-06, "loss": 0.3756, "step": 9802, "task_loss": 0.476369172334671 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19718025624752045, "epoch": 8.29, "learning_rate": 8.56720202874049e-06, "loss": 0.4194, "step": 9803, "task_loss": 0.5830941796302795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24336951971054077, "epoch": 8.29, "learning_rate": 8.562975486052409e-06, "loss": 0.2964, "step": 9804, "task_loss": 0.26734843850135803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27275145053863525, "epoch": 8.29, "learning_rate": 8.558748943364328e-06, "loss": 0.394, "step": 9805, "task_loss": 0.3465957045555115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25579655170440674, "epoch": 8.29, "learning_rate": 8.554522400676246e-06, "loss": 0.2906, "step": 9806, "task_loss": 0.732319712638855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.624285101890564, "epoch": 8.29, "learning_rate": 8.550295857988166e-06, "loss": 0.4672, "step": 9807, "task_loss": 0.6681028008460999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15036822855472565, "epoch": 8.29, "learning_rate": 8.546069315300084e-06, "loss": 0.2846, "step": 9808, "task_loss": 0.31999534368515015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4442770779132843, "epoch": 8.29, "learning_rate": 8.541842772612004e-06, "loss": 0.4437, "step": 9809, "task_loss": 0.9391199350357056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2138526886701584, "epoch": 8.29, "learning_rate": 8.537616229923922e-06, "loss": 0.2339, "step": 9810, "task_loss": 0.472034215927124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36865174770355225, "epoch": 8.29, "learning_rate": 8.533389687235842e-06, "loss": 0.2825, "step": 9811, "task_loss": 0.5916270017623901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18036213517189026, "epoch": 8.29, "learning_rate": 8.529163144547762e-06, "loss": 0.2932, "step": 9812, "task_loss": 0.1685146987438202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2766318917274475, "epoch": 8.29, "learning_rate": 8.52493660185968e-06, "loss": 0.3511, "step": 9813, "task_loss": 0.1905214786529541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.273795485496521, "epoch": 8.3, "learning_rate": 8.520710059171598e-06, "loss": 0.4107, "step": 9814, "task_loss": 0.695587158203125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18488679826259613, "epoch": 8.3, "learning_rate": 8.516483516483517e-06, "loss": 0.2454, "step": 9815, "task_loss": 0.07990279048681259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46626415848731995, "epoch": 8.3, "learning_rate": 8.512256973795437e-06, "loss": 0.3592, "step": 9816, "task_loss": 0.6682854890823364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.220209002494812, "epoch": 8.3, "learning_rate": 8.508030431107353e-06, "loss": 0.4499, "step": 9817, "task_loss": 0.30251544713974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18685534596443176, "epoch": 8.3, "learning_rate": 8.503803888419273e-06, "loss": 0.284, "step": 9818, "task_loss": 0.5266330242156982 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18420666456222534, "epoch": 8.3, "learning_rate": 8.499577345731193e-06, "loss": 0.3046, "step": 9819, "task_loss": 0.4369259476661682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23351049423217773, "epoch": 8.3, "learning_rate": 8.495350803043111e-06, "loss": 0.4464, "step": 9820, "task_loss": 0.788015604019165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40722036361694336, "epoch": 8.3, "learning_rate": 8.491124260355029e-06, "loss": 0.3683, "step": 9821, "task_loss": 1.1177533864974976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5063949823379517, "epoch": 8.3, "learning_rate": 8.486897717666949e-06, "loss": 0.416, "step": 9822, "task_loss": 1.0140175819396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3399730622768402, "epoch": 8.3, "learning_rate": 8.482671174978868e-06, "loss": 0.3062, "step": 9823, "task_loss": 1.3542838096618652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44571805000305176, "epoch": 8.3, "learning_rate": 8.478444632290787e-06, "loss": 0.3215, "step": 9824, "task_loss": 1.4812110662460327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16501504182815552, "epoch": 8.3, "learning_rate": 8.474218089602705e-06, "loss": 0.3927, "step": 9825, "task_loss": 0.07019282877445221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6326087713241577, "epoch": 8.31, "learning_rate": 8.469991546914624e-06, "loss": 0.4385, "step": 9826, "task_loss": 0.4715222120285034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34353554248809814, "epoch": 8.31, "learning_rate": 8.465765004226544e-06, "loss": 0.2911, "step": 9827, "task_loss": 0.3344884216785431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45326435565948486, "epoch": 8.31, "learning_rate": 8.461538461538462e-06, "loss": 0.4698, "step": 9828, "task_loss": 1.0761221647262573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15374648571014404, "epoch": 8.31, "learning_rate": 8.45731191885038e-06, "loss": 0.2651, "step": 9829, "task_loss": 0.36588919162750244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4200078547000885, "epoch": 8.31, "learning_rate": 8.4530853761623e-06, "loss": 0.3407, "step": 9830, "task_loss": 0.9219998717308044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30679208040237427, "epoch": 8.31, "learning_rate": 8.448858833474218e-06, "loss": 0.4453, "step": 9831, "task_loss": 0.6855963468551636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5602876543998718, "epoch": 8.31, "learning_rate": 8.444632290786138e-06, "loss": 0.3933, "step": 9832, "task_loss": 0.6793924570083618 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3089636564254761, "epoch": 8.31, "learning_rate": 8.440405748098056e-06, "loss": 0.2913, "step": 9833, "task_loss": 0.6300331354141235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.598201334476471, "epoch": 8.31, "learning_rate": 8.436179205409975e-06, "loss": 0.4884, "step": 9834, "task_loss": 1.1318031549453735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42228835821151733, "epoch": 8.31, "learning_rate": 8.431952662721893e-06, "loss": 0.3413, "step": 9835, "task_loss": 0.6755003929138184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7260577082633972, "epoch": 8.31, "learning_rate": 8.427726120033813e-06, "loss": 0.5329, "step": 9836, "task_loss": 0.8321139216423035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4221124053001404, "epoch": 8.32, "learning_rate": 8.423499577345731e-06, "loss": 0.4089, "step": 9837, "task_loss": 0.7170254588127136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25226616859436035, "epoch": 8.32, "learning_rate": 8.41927303465765e-06, "loss": 0.3621, "step": 9838, "task_loss": 1.1626739501953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36467647552490234, "epoch": 8.32, "learning_rate": 8.415046491969569e-06, "loss": 0.4305, "step": 9839, "task_loss": 0.4601993262767792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34807509183883667, "epoch": 8.32, "learning_rate": 8.410819949281489e-06, "loss": 0.332, "step": 9840, "task_loss": 1.4866724014282227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3440382778644562, "epoch": 8.32, "learning_rate": 8.406593406593407e-06, "loss": 0.2649, "step": 9841, "task_loss": 0.43716341257095337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17226433753967285, "epoch": 8.32, "learning_rate": 8.402366863905325e-06, "loss": 0.383, "step": 9842, "task_loss": 0.30462706089019775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27353745698928833, "epoch": 8.32, "learning_rate": 8.398140321217245e-06, "loss": 0.3829, "step": 9843, "task_loss": 1.0205519199371338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36124366521835327, "epoch": 8.32, "learning_rate": 8.393913778529164e-06, "loss": 0.4015, "step": 9844, "task_loss": 0.7810443639755249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3139660358428955, "epoch": 8.32, "learning_rate": 8.389687235841082e-06, "loss": 0.3712, "step": 9845, "task_loss": 0.5591665506362915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2383313775062561, "epoch": 8.32, "learning_rate": 8.385460693153e-06, "loss": 0.343, "step": 9846, "task_loss": 0.4618859887123108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4592198133468628, "epoch": 8.32, "learning_rate": 8.38123415046492e-06, "loss": 0.3406, "step": 9847, "task_loss": 0.802821695804596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.355598121881485, "epoch": 8.32, "learning_rate": 8.37700760777684e-06, "loss": 0.2888, "step": 9848, "task_loss": 0.3750664293766022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4725492000579834, "epoch": 8.33, "learning_rate": 8.372781065088758e-06, "loss": 0.3488, "step": 9849, "task_loss": 0.9732497334480286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48520582914352417, "epoch": 8.33, "learning_rate": 8.368554522400676e-06, "loss": 0.3849, "step": 9850, "task_loss": 1.0766435861587524 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19553996622562408, "epoch": 8.33, "learning_rate": 8.364327979712596e-06, "loss": 0.2692, "step": 9851, "task_loss": 0.5403403043746948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26956072449684143, "epoch": 8.33, "learning_rate": 8.360101437024514e-06, "loss": 0.3846, "step": 9852, "task_loss": 0.29928338527679443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5034545063972473, "epoch": 8.33, "learning_rate": 8.355874894336434e-06, "loss": 0.4389, "step": 9853, "task_loss": 0.850502073764801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.655781090259552, "epoch": 8.33, "learning_rate": 8.351648351648352e-06, "loss": 0.3631, "step": 9854, "task_loss": 0.6750800013542175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27420270442962646, "epoch": 8.33, "learning_rate": 8.347421808960271e-06, "loss": 0.3808, "step": 9855, "task_loss": 0.6062148213386536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29507923126220703, "epoch": 8.33, "learning_rate": 8.34319526627219e-06, "loss": 0.2839, "step": 9856, "task_loss": 0.12409472465515137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39879393577575684, "epoch": 8.33, "learning_rate": 8.338968723584109e-06, "loss": 0.4118, "step": 9857, "task_loss": 0.5695974230766296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38156503438949585, "epoch": 8.33, "learning_rate": 8.334742180896027e-06, "loss": 0.3743, "step": 9858, "task_loss": 0.6405352354049683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6437473297119141, "epoch": 8.33, "learning_rate": 8.330515638207947e-06, "loss": 0.412, "step": 9859, "task_loss": 0.5421026349067688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1625746786594391, "epoch": 8.33, "learning_rate": 8.326289095519865e-06, "loss": 0.4397, "step": 9860, "task_loss": 0.24758104979991913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33773335814476013, "epoch": 8.34, "learning_rate": 8.322062552831785e-06, "loss": 0.452, "step": 9861, "task_loss": 0.028131060302257538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.404288649559021, "epoch": 8.34, "learning_rate": 8.317836010143703e-06, "loss": 0.5131, "step": 9862, "task_loss": 0.759227454662323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5208569765090942, "epoch": 8.34, "learning_rate": 8.31360946745562e-06, "loss": 0.399, "step": 9863, "task_loss": 0.7569478154182434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31458181142807007, "epoch": 8.34, "learning_rate": 8.30938292476754e-06, "loss": 0.4308, "step": 9864, "task_loss": 0.4518139362335205 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5279165506362915, "epoch": 8.34, "learning_rate": 8.30515638207946e-06, "loss": 0.4692, "step": 9865, "task_loss": 0.3020053505897522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27899041771888733, "epoch": 8.34, "learning_rate": 8.300929839391378e-06, "loss": 0.5797, "step": 9866, "task_loss": 0.6417058110237122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23574316501617432, "epoch": 8.34, "learning_rate": 8.296703296703296e-06, "loss": 0.369, "step": 9867, "task_loss": 0.2814878523349762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3385157883167267, "epoch": 8.34, "learning_rate": 8.292476754015216e-06, "loss": 0.3535, "step": 9868, "task_loss": 0.19284862279891968 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.337044358253479, "epoch": 8.34, "learning_rate": 8.288250211327136e-06, "loss": 0.3136, "step": 9869, "task_loss": 0.25778377056121826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38773009181022644, "epoch": 8.34, "learning_rate": 8.284023668639054e-06, "loss": 0.3432, "step": 9870, "task_loss": 0.5943514108657837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4057808518409729, "epoch": 8.34, "learning_rate": 8.279797125950972e-06, "loss": 0.3282, "step": 9871, "task_loss": 0.13423751294612885 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47784048318862915, "epoch": 8.34, "learning_rate": 8.275570583262892e-06, "loss": 0.3641, "step": 9872, "task_loss": 0.3588549792766571 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3793264627456665, "epoch": 8.35, "learning_rate": 8.27134404057481e-06, "loss": 0.5567, "step": 9873, "task_loss": 0.07439304143190384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4812926948070526, "epoch": 8.35, "learning_rate": 8.26711749788673e-06, "loss": 0.4656, "step": 9874, "task_loss": 1.0317496061325073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3727072477340698, "epoch": 8.35, "learning_rate": 8.262890955198647e-06, "loss": 0.4308, "step": 9875, "task_loss": 1.0501117706298828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29243168234825134, "epoch": 8.35, "learning_rate": 8.258664412510567e-06, "loss": 0.311, "step": 9876, "task_loss": 0.05869528278708458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29091677069664, "epoch": 8.35, "learning_rate": 8.254437869822485e-06, "loss": 0.5337, "step": 9877, "task_loss": 0.3071219027042389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37691131234169006, "epoch": 8.35, "learning_rate": 8.250211327134405e-06, "loss": 0.4297, "step": 9878, "task_loss": 0.5585564374923706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5153303146362305, "epoch": 8.35, "learning_rate": 8.245984784446323e-06, "loss": 0.3111, "step": 9879, "task_loss": 0.35851961374282837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29009193181991577, "epoch": 8.35, "learning_rate": 8.241758241758243e-06, "loss": 0.3515, "step": 9880, "task_loss": 0.05839831382036209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4133725166320801, "epoch": 8.35, "learning_rate": 8.23753169907016e-06, "loss": 0.3446, "step": 9881, "task_loss": 0.6248162984848022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4489487409591675, "epoch": 8.35, "learning_rate": 8.23330515638208e-06, "loss": 0.3599, "step": 9882, "task_loss": 0.5395919680595398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49348533153533936, "epoch": 8.35, "learning_rate": 8.229078613693999e-06, "loss": 0.4575, "step": 9883, "task_loss": 0.8853915929794312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3440357744693756, "epoch": 8.35, "learning_rate": 8.224852071005917e-06, "loss": 0.4448, "step": 9884, "task_loss": 0.2521446645259857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5212501883506775, "epoch": 8.36, "learning_rate": 8.220625528317836e-06, "loss": 0.3489, "step": 9885, "task_loss": 0.8031014204025269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40825706720352173, "epoch": 8.36, "learning_rate": 8.216398985629756e-06, "loss": 0.3054, "step": 9886, "task_loss": 0.4760590195655823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30783775448799133, "epoch": 8.36, "learning_rate": 8.212172442941674e-06, "loss": 0.3707, "step": 9887, "task_loss": 1.3281574249267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4840999245643616, "epoch": 8.36, "learning_rate": 8.207945900253592e-06, "loss": 0.4185, "step": 9888, "task_loss": 0.356241375207901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40270790457725525, "epoch": 8.36, "learning_rate": 8.203719357565512e-06, "loss": 0.3681, "step": 9889, "task_loss": 0.49908339977264404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1393299549818039, "epoch": 8.36, "learning_rate": 8.199492814877432e-06, "loss": 0.397, "step": 9890, "task_loss": 0.5084965825080872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33766070008277893, "epoch": 8.36, "learning_rate": 8.19526627218935e-06, "loss": 0.3042, "step": 9891, "task_loss": 0.6804378628730774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2858445942401886, "epoch": 8.36, "learning_rate": 8.191039729501268e-06, "loss": 0.2528, "step": 9892, "task_loss": 0.2925701141357422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28024572134017944, "epoch": 8.36, "learning_rate": 8.186813186813188e-06, "loss": 0.4699, "step": 9893, "task_loss": 0.3370833694934845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39834460616111755, "epoch": 8.36, "learning_rate": 8.182586644125107e-06, "loss": 0.3797, "step": 9894, "task_loss": 0.37989887595176697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2483496069908142, "epoch": 8.36, "learning_rate": 8.178360101437024e-06, "loss": 0.3141, "step": 9895, "task_loss": 0.4723920226097107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23568809032440186, "epoch": 8.36, "learning_rate": 8.174133558748943e-06, "loss": 0.289, "step": 9896, "task_loss": 0.501482367515564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2683965265750885, "epoch": 8.37, "learning_rate": 8.169907016060863e-06, "loss": 0.3067, "step": 9897, "task_loss": 0.10031570494174957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19225439429283142, "epoch": 8.37, "learning_rate": 8.165680473372781e-06, "loss": 0.3742, "step": 9898, "task_loss": 0.2847628891468048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47012925148010254, "epoch": 8.37, "learning_rate": 8.161453930684701e-06, "loss": 0.4484, "step": 9899, "task_loss": 0.2704078257083893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2899574041366577, "epoch": 8.37, "learning_rate": 8.157227387996619e-06, "loss": 0.4418, "step": 9900, "task_loss": 0.8580310940742493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4211597442626953, "epoch": 8.37, "learning_rate": 8.153000845308539e-06, "loss": 0.3541, "step": 9901, "task_loss": 1.182708501815796 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6386754512786865, "epoch": 8.37, "learning_rate": 8.148774302620457e-06, "loss": 0.5302, "step": 9902, "task_loss": 0.18620972335338593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26958057284355164, "epoch": 8.37, "learning_rate": 8.144547759932376e-06, "loss": 0.4203, "step": 9903, "task_loss": 0.14039455354213715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44917744398117065, "epoch": 8.37, "learning_rate": 8.140321217244294e-06, "loss": 0.468, "step": 9904, "task_loss": 0.6625657677650452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23699799180030823, "epoch": 8.37, "learning_rate": 8.136094674556213e-06, "loss": 0.2832, "step": 9905, "task_loss": 0.49559539556503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4702160358428955, "epoch": 8.37, "learning_rate": 8.131868131868132e-06, "loss": 0.4044, "step": 9906, "task_loss": 0.49343767762184143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3203650414943695, "epoch": 8.37, "learning_rate": 8.127641589180052e-06, "loss": 0.4482, "step": 9907, "task_loss": 0.1452435553073883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39907383918762207, "epoch": 8.38, "learning_rate": 8.12341504649197e-06, "loss": 0.3453, "step": 9908, "task_loss": 0.8481168746948242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17972984910011292, "epoch": 8.38, "learning_rate": 8.119188503803888e-06, "loss": 0.3998, "step": 9909, "task_loss": 0.48957115411758423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18523091077804565, "epoch": 8.38, "learning_rate": 8.114961961115808e-06, "loss": 0.4575, "step": 9910, "task_loss": 0.3024050295352936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3940434455871582, "epoch": 8.38, "learning_rate": 8.110735418427728e-06, "loss": 0.4355, "step": 9911, "task_loss": 0.4449685215950012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47976329922676086, "epoch": 8.38, "learning_rate": 8.106508875739646e-06, "loss": 0.4465, "step": 9912, "task_loss": 1.381178855895996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4957450330257416, "epoch": 8.38, "learning_rate": 8.102282333051564e-06, "loss": 0.4842, "step": 9913, "task_loss": 0.246063232421875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17261847853660583, "epoch": 8.38, "learning_rate": 8.098055790363483e-06, "loss": 0.399, "step": 9914, "task_loss": 0.31979015469551086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22802722454071045, "epoch": 8.38, "learning_rate": 8.093829247675403e-06, "loss": 0.3671, "step": 9915, "task_loss": 0.12243936955928802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.370728999376297, "epoch": 8.38, "learning_rate": 8.08960270498732e-06, "loss": 0.3505, "step": 9916, "task_loss": 0.6238110661506653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17308086156845093, "epoch": 8.38, "learning_rate": 8.08537616229924e-06, "loss": 0.3311, "step": 9917, "task_loss": 0.4537776708602905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35523146390914917, "epoch": 8.38, "learning_rate": 8.081149619611159e-06, "loss": 0.5825, "step": 9918, "task_loss": 0.9502268433570862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6385256052017212, "epoch": 8.38, "learning_rate": 8.076923076923077e-06, "loss": 0.4117, "step": 9919, "task_loss": 0.5215417742729187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4543847143650055, "epoch": 8.39, "learning_rate": 8.072696534234995e-06, "loss": 0.4833, "step": 9920, "task_loss": 0.31832101941108704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18559032678604126, "epoch": 8.39, "learning_rate": 8.068469991546915e-06, "loss": 0.3185, "step": 9921, "task_loss": 0.17780497670173645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2549199163913727, "epoch": 8.39, "learning_rate": 8.064243448858835e-06, "loss": 0.4007, "step": 9922, "task_loss": 0.40377604961395264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22342194616794586, "epoch": 8.39, "learning_rate": 8.060016906170753e-06, "loss": 0.262, "step": 9923, "task_loss": 0.1295715868473053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20527736842632294, "epoch": 8.39, "learning_rate": 8.05579036348267e-06, "loss": 0.3797, "step": 9924, "task_loss": 0.5065737366676331 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.637683093547821, "epoch": 8.39, "learning_rate": 8.05156382079459e-06, "loss": 0.3786, "step": 9925, "task_loss": 0.7894318103790283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3687111735343933, "epoch": 8.39, "learning_rate": 8.04733727810651e-06, "loss": 0.4023, "step": 9926, "task_loss": 0.5464559197425842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46690598130226135, "epoch": 8.39, "learning_rate": 8.043110735418428e-06, "loss": 0.5103, "step": 9927, "task_loss": 0.630386471748352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32390865683555603, "epoch": 8.39, "learning_rate": 8.038884192730348e-06, "loss": 0.4016, "step": 9928, "task_loss": 1.035956859588623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3263307213783264, "epoch": 8.39, "learning_rate": 8.034657650042266e-06, "loss": 0.4079, "step": 9929, "task_loss": 0.5266216397285461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3923707902431488, "epoch": 8.39, "learning_rate": 8.030431107354184e-06, "loss": 0.4381, "step": 9930, "task_loss": 0.757575511932373 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3148203194141388, "epoch": 8.39, "learning_rate": 8.026204564666104e-06, "loss": 0.3, "step": 9931, "task_loss": 0.3477681279182434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22264418005943298, "epoch": 8.4, "learning_rate": 8.021978021978023e-06, "loss": 0.3898, "step": 9932, "task_loss": 0.5065764784812927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24075189232826233, "epoch": 8.4, "learning_rate": 8.017751479289941e-06, "loss": 0.3037, "step": 9933, "task_loss": 0.5321162939071655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20382162928581238, "epoch": 8.4, "learning_rate": 8.01352493660186e-06, "loss": 0.3014, "step": 9934, "task_loss": 0.3911793529987335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17122229933738708, "epoch": 8.4, "learning_rate": 8.00929839391378e-06, "loss": 0.2796, "step": 9935, "task_loss": 1.2716217041015625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25934481620788574, "epoch": 8.4, "learning_rate": 8.005071851225699e-06, "loss": 0.3991, "step": 9936, "task_loss": 0.1272660493850708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2740709185600281, "epoch": 8.4, "learning_rate": 8.000845308537615e-06, "loss": 0.2342, "step": 9937, "task_loss": 0.39149194955825806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.255855917930603, "epoch": 8.4, "learning_rate": 7.996618765849535e-06, "loss": 0.3114, "step": 9938, "task_loss": 0.4888307452201843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4605828523635864, "epoch": 8.4, "learning_rate": 7.992392223161455e-06, "loss": 0.3685, "step": 9939, "task_loss": 0.39939507842063904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3568412661552429, "epoch": 8.4, "learning_rate": 7.988165680473373e-06, "loss": 0.2936, "step": 9940, "task_loss": 0.23691995441913605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40177398920059204, "epoch": 8.4, "learning_rate": 7.983939137785291e-06, "loss": 0.4462, "step": 9941, "task_loss": 0.7156034111976624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2976890206336975, "epoch": 8.4, "learning_rate": 7.97971259509721e-06, "loss": 0.3531, "step": 9942, "task_loss": 0.7515323758125305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49165937304496765, "epoch": 8.4, "learning_rate": 7.97548605240913e-06, "loss": 0.3915, "step": 9943, "task_loss": 0.3210039734840393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5673562288284302, "epoch": 8.41, "learning_rate": 7.971259509721048e-06, "loss": 0.4346, "step": 9944, "task_loss": 1.0063607692718506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26934075355529785, "epoch": 8.41, "learning_rate": 7.967032967032966e-06, "loss": 0.2816, "step": 9945, "task_loss": 0.17124764621257782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20996598899364471, "epoch": 8.41, "learning_rate": 7.962806424344886e-06, "loss": 0.3992, "step": 9946, "task_loss": 0.28219518065452576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4716126620769501, "epoch": 8.41, "learning_rate": 7.958579881656806e-06, "loss": 0.4462, "step": 9947, "task_loss": 0.7046942114830017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2334522157907486, "epoch": 8.41, "learning_rate": 7.954353338968724e-06, "loss": 0.3614, "step": 9948, "task_loss": 0.060270264744758606 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2787330448627472, "epoch": 8.41, "learning_rate": 7.950126796280642e-06, "loss": 0.3452, "step": 9949, "task_loss": 1.1330339908599854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20659777522087097, "epoch": 8.41, "learning_rate": 7.945900253592562e-06, "loss": 0.356, "step": 9950, "task_loss": 0.28001347184181213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44068729877471924, "epoch": 8.41, "learning_rate": 7.94167371090448e-06, "loss": 0.3424, "step": 9951, "task_loss": 0.7366507053375244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35117146372795105, "epoch": 8.41, "learning_rate": 7.9374471682164e-06, "loss": 0.3334, "step": 9952, "task_loss": 0.7413483262062073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3230150640010834, "epoch": 8.41, "learning_rate": 7.933220625528318e-06, "loss": 0.2723, "step": 9953, "task_loss": 0.3673068881034851 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6200671792030334, "epoch": 8.41, "learning_rate": 7.928994082840237e-06, "loss": 0.5178, "step": 9954, "task_loss": 0.9971244931221008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24222692847251892, "epoch": 8.41, "learning_rate": 7.924767540152155e-06, "loss": 0.2965, "step": 9955, "task_loss": 0.6877383589744568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35239019989967346, "epoch": 8.42, "learning_rate": 7.920540997464075e-06, "loss": 0.3727, "step": 9956, "task_loss": 0.45825764536857605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4563480615615845, "epoch": 8.42, "learning_rate": 7.916314454775993e-06, "loss": 0.3432, "step": 9957, "task_loss": 0.7692358493804932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20112793147563934, "epoch": 8.42, "learning_rate": 7.912087912087913e-06, "loss": 0.4285, "step": 9958, "task_loss": 0.7713651657104492 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4392203688621521, "epoch": 8.42, "learning_rate": 7.907861369399831e-06, "loss": 0.2882, "step": 9959, "task_loss": 0.7245163321495056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22501957416534424, "epoch": 8.42, "learning_rate": 7.90363482671175e-06, "loss": 0.3645, "step": 9960, "task_loss": 0.03207080438733101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24949821829795837, "epoch": 8.42, "learning_rate": 7.89940828402367e-06, "loss": 0.3796, "step": 9961, "task_loss": 0.1074836477637291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3178539574146271, "epoch": 8.42, "learning_rate": 7.895181741335587e-06, "loss": 0.3421, "step": 9962, "task_loss": 0.363930344581604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45336586236953735, "epoch": 8.42, "learning_rate": 7.890955198647507e-06, "loss": 0.4061, "step": 9963, "task_loss": 1.1299898624420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5575181841850281, "epoch": 8.42, "learning_rate": 7.886728655959426e-06, "loss": 0.4031, "step": 9964, "task_loss": 0.864539384841919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21565748751163483, "epoch": 8.42, "learning_rate": 7.882502113271344e-06, "loss": 0.3359, "step": 9965, "task_loss": 0.5902684926986694 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23953759670257568, "epoch": 8.42, "learning_rate": 7.878275570583262e-06, "loss": 0.3289, "step": 9966, "task_loss": 0.4212299585342407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4753879904747009, "epoch": 8.42, "learning_rate": 7.874049027895182e-06, "loss": 0.4192, "step": 9967, "task_loss": 0.6777235865592957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3934580683708191, "epoch": 8.43, "learning_rate": 7.869822485207102e-06, "loss": 0.4313, "step": 9968, "task_loss": 1.122774362564087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4727416932582855, "epoch": 8.43, "learning_rate": 7.86559594251902e-06, "loss": 0.3283, "step": 9969, "task_loss": 0.6974933743476868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3496593236923218, "epoch": 8.43, "learning_rate": 7.861369399830938e-06, "loss": 0.3877, "step": 9970, "task_loss": 0.6283162236213684 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36651259660720825, "epoch": 8.43, "learning_rate": 7.857142857142858e-06, "loss": 0.3055, "step": 9971, "task_loss": 0.4463374614715576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27455392479896545, "epoch": 8.43, "learning_rate": 7.852916314454776e-06, "loss": 0.4034, "step": 9972, "task_loss": 0.4421502947807312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4017808735370636, "epoch": 8.43, "learning_rate": 7.848689771766695e-06, "loss": 0.4118, "step": 9973, "task_loss": 1.015561580657959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18567708134651184, "epoch": 8.43, "learning_rate": 7.844463229078613e-06, "loss": 0.3941, "step": 9974, "task_loss": 0.30658280849456787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22371090948581696, "epoch": 8.43, "learning_rate": 7.840236686390533e-06, "loss": 0.2463, "step": 9975, "task_loss": 0.5453804731369019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5094899535179138, "epoch": 8.43, "learning_rate": 7.836010143702451e-06, "loss": 0.3741, "step": 9976, "task_loss": 0.7566254138946533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4013715386390686, "epoch": 8.43, "learning_rate": 7.831783601014371e-06, "loss": 0.3908, "step": 9977, "task_loss": 1.00264310836792 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33096060156822205, "epoch": 8.43, "learning_rate": 7.827557058326289e-06, "loss": 0.3425, "step": 9978, "task_loss": 0.8430969715118408 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4888285994529724, "epoch": 8.44, "learning_rate": 7.823330515638209e-06, "loss": 0.4875, "step": 9979, "task_loss": 1.28627347946167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.535581111907959, "epoch": 8.44, "learning_rate": 7.819103972950127e-06, "loss": 0.5124, "step": 9980, "task_loss": 0.49604836106300354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.255129337310791, "epoch": 8.44, "learning_rate": 7.814877430262047e-06, "loss": 0.3107, "step": 9981, "task_loss": 0.24262180924415588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32798492908477783, "epoch": 8.44, "learning_rate": 7.810650887573965e-06, "loss": 0.4304, "step": 9982, "task_loss": 0.7916689515113831 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31929513812065125, "epoch": 8.44, "learning_rate": 7.806424344885883e-06, "loss": 0.3273, "step": 9983, "task_loss": 0.6244804263114929 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.286630243062973, "epoch": 8.44, "learning_rate": 7.802197802197802e-06, "loss": 0.3666, "step": 9984, "task_loss": 0.3419410288333893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2809467315673828, "epoch": 8.44, "learning_rate": 7.797971259509722e-06, "loss": 0.4896, "step": 9985, "task_loss": 0.09554605185985565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3604429364204407, "epoch": 8.44, "learning_rate": 7.79374471682164e-06, "loss": 0.4541, "step": 9986, "task_loss": 0.3937378525733948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41663700342178345, "epoch": 8.44, "learning_rate": 7.789518174133558e-06, "loss": 0.4006, "step": 9987, "task_loss": 1.0851434469223022 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.291340708732605, "epoch": 8.44, "learning_rate": 7.785291631445478e-06, "loss": 0.3726, "step": 9988, "task_loss": 1.104375958442688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5594011545181274, "epoch": 8.44, "learning_rate": 7.781065088757398e-06, "loss": 0.3433, "step": 9989, "task_loss": 1.2137364149093628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.501724898815155, "epoch": 8.44, "learning_rate": 7.776838546069316e-06, "loss": 0.3373, "step": 9990, "task_loss": 0.6115052700042725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.291567325592041, "epoch": 8.45, "learning_rate": 7.772612003381234e-06, "loss": 0.3705, "step": 9991, "task_loss": 0.3286609649658203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24879682064056396, "epoch": 8.45, "learning_rate": 7.768385460693154e-06, "loss": 0.3386, "step": 9992, "task_loss": 0.15706300735473633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23902809619903564, "epoch": 8.45, "learning_rate": 7.764158918005073e-06, "loss": 0.4161, "step": 9993, "task_loss": 0.1779930591583252 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3532010316848755, "epoch": 8.45, "learning_rate": 7.759932375316991e-06, "loss": 0.4264, "step": 9994, "task_loss": 0.18645226955413818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49448999762535095, "epoch": 8.45, "learning_rate": 7.75570583262891e-06, "loss": 0.4334, "step": 9995, "task_loss": 1.6293988227844238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2907274663448334, "epoch": 8.45, "learning_rate": 7.751479289940829e-06, "loss": 0.349, "step": 9996, "task_loss": 0.24322834610939026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37505269050598145, "epoch": 8.45, "learning_rate": 7.747252747252747e-06, "loss": 0.411, "step": 9997, "task_loss": 0.5230420827865601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4134119153022766, "epoch": 8.45, "learning_rate": 7.743026204564667e-06, "loss": 0.3025, "step": 9998, "task_loss": 0.8234663009643555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42424696683883667, "epoch": 8.45, "learning_rate": 7.738799661876585e-06, "loss": 0.4191, "step": 9999, "task_loss": 1.0713717937469482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3065156936645508, "epoch": 8.45, "learning_rate": 7.734573119188505e-06, "loss": 0.3651, "step": 10000, "task_loss": 0.16009142994880676 }, { "epoch": 8.45, "eval_accuracy": 0.9165148514851486, "eval_loss": 0.2567279040813446, "eval_runtime": 225.5684, "eval_samples_per_second": 111.939, "eval_steps_per_second": 0.878, "step": 10000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15647852420806885, "epoch": 8.45, "learning_rate": 7.730346576500423e-06, "loss": 0.2385, "step": 10001, "task_loss": 0.3650796711444855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5962855219841003, "epoch": 8.45, "learning_rate": 7.726120033812342e-06, "loss": 0.4114, "step": 10002, "task_loss": 0.23950029909610748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35707521438598633, "epoch": 8.46, "learning_rate": 7.72189349112426e-06, "loss": 0.3885, "step": 10003, "task_loss": 0.5751855969429016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26414790749549866, "epoch": 8.46, "learning_rate": 7.717666948436179e-06, "loss": 0.2771, "step": 10004, "task_loss": 0.16733896732330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20213139057159424, "epoch": 8.46, "learning_rate": 7.713440405748098e-06, "loss": 0.4707, "step": 10005, "task_loss": 0.26585355401039124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21714413166046143, "epoch": 8.46, "learning_rate": 7.709213863060018e-06, "loss": 0.2975, "step": 10006, "task_loss": 0.45038193464279175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3212604224681854, "epoch": 8.46, "learning_rate": 7.704987320371936e-06, "loss": 0.388, "step": 10007, "task_loss": 0.30243954062461853 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30845028162002563, "epoch": 8.46, "learning_rate": 7.700760777683854e-06, "loss": 0.4041, "step": 10008, "task_loss": 0.374865859746933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37851595878601074, "epoch": 8.46, "learning_rate": 7.696534234995774e-06, "loss": 0.3195, "step": 10009, "task_loss": 0.3934018611907959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3625902235507965, "epoch": 8.46, "learning_rate": 7.692307692307694e-06, "loss": 0.3909, "step": 10010, "task_loss": 0.5277790427207947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41369467973709106, "epoch": 8.46, "learning_rate": 7.688081149619612e-06, "loss": 0.3433, "step": 10011, "task_loss": 0.5595895648002625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44314971566200256, "epoch": 8.46, "learning_rate": 7.68385460693153e-06, "loss": 0.3874, "step": 10012, "task_loss": 0.7153849601745605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42098793387413025, "epoch": 8.46, "learning_rate": 7.67962806424345e-06, "loss": 0.3927, "step": 10013, "task_loss": 1.7886065244674683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3648926615715027, "epoch": 8.46, "learning_rate": 7.67540152155537e-06, "loss": 0.32, "step": 10014, "task_loss": 0.6187127232551575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24024340510368347, "epoch": 8.47, "learning_rate": 7.671174978867286e-06, "loss": 0.433, "step": 10015, "task_loss": 0.5889041423797607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3975982666015625, "epoch": 8.47, "learning_rate": 7.666948436179205e-06, "loss": 0.3824, "step": 10016, "task_loss": 0.09880679845809937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.454243928194046, "epoch": 8.47, "learning_rate": 7.662721893491125e-06, "loss": 0.4064, "step": 10017, "task_loss": 1.050737977027893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2888142466545105, "epoch": 8.47, "learning_rate": 7.658495350803043e-06, "loss": 0.3487, "step": 10018, "task_loss": 0.5387918949127197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23971499502658844, "epoch": 8.47, "learning_rate": 7.654268808114963e-06, "loss": 0.3212, "step": 10019, "task_loss": 0.3706704080104828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2744256854057312, "epoch": 8.47, "learning_rate": 7.65004226542688e-06, "loss": 0.3616, "step": 10020, "task_loss": 0.0930965319275856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3478437662124634, "epoch": 8.47, "learning_rate": 7.6458157227388e-06, "loss": 0.3272, "step": 10021, "task_loss": 0.7598109841346741 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19099076092243195, "epoch": 8.47, "learning_rate": 7.641589180050719e-06, "loss": 0.2626, "step": 10022, "task_loss": 0.0723000094294548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30972692370414734, "epoch": 8.47, "learning_rate": 7.637362637362638e-06, "loss": 0.371, "step": 10023, "task_loss": 1.400354266166687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4075191020965576, "epoch": 8.47, "learning_rate": 7.633136094674556e-06, "loss": 0.2953, "step": 10024, "task_loss": 0.5419731140136719 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38037964701652527, "epoch": 8.47, "learning_rate": 7.628909551986475e-06, "loss": 0.4287, "step": 10025, "task_loss": 0.7014669179916382 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6216515302658081, "epoch": 8.47, "learning_rate": 7.624683009298394e-06, "loss": 0.4726, "step": 10026, "task_loss": 0.3705052137374878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.503270149230957, "epoch": 8.48, "learning_rate": 7.620456466610314e-06, "loss": 0.3937, "step": 10027, "task_loss": 1.0167361497879028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3390476703643799, "epoch": 8.48, "learning_rate": 7.616229923922231e-06, "loss": 0.3597, "step": 10028, "task_loss": 0.6971657276153564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5268529653549194, "epoch": 8.48, "learning_rate": 7.612003381234151e-06, "loss": 0.4051, "step": 10029, "task_loss": 0.8590385317802429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21674463152885437, "epoch": 8.48, "learning_rate": 7.60777683854607e-06, "loss": 0.3354, "step": 10030, "task_loss": 0.4922611117362976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5444332361221313, "epoch": 8.48, "learning_rate": 7.6035502958579895e-06, "loss": 0.3865, "step": 10031, "task_loss": 0.7442688345909119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3745216131210327, "epoch": 8.48, "learning_rate": 7.599323753169907e-06, "loss": 0.4549, "step": 10032, "task_loss": 0.4452337622642517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43300506472587585, "epoch": 8.48, "learning_rate": 7.595097210481826e-06, "loss": 0.3777, "step": 10033, "task_loss": 0.28494003415107727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3483816981315613, "epoch": 8.48, "learning_rate": 7.590870667793745e-06, "loss": 0.3578, "step": 10034, "task_loss": 0.3203364908695221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47657787799835205, "epoch": 8.48, "learning_rate": 7.586644125105664e-06, "loss": 0.3299, "step": 10035, "task_loss": 0.43723198771476746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3334798812866211, "epoch": 8.48, "learning_rate": 7.582417582417582e-06, "loss": 0.4036, "step": 10036, "task_loss": 0.727150559425354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2927389442920685, "epoch": 8.48, "learning_rate": 7.578191039729501e-06, "loss": 0.3914, "step": 10037, "task_loss": 0.9308177828788757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.81629478931427, "epoch": 8.48, "learning_rate": 7.573964497041421e-06, "loss": 0.6058, "step": 10038, "task_loss": 0.6574467420578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40065625309944153, "epoch": 8.49, "learning_rate": 7.56973795435334e-06, "loss": 0.3675, "step": 10039, "task_loss": 1.2913084030151367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4693346619606018, "epoch": 8.49, "learning_rate": 7.565511411665258e-06, "loss": 0.3927, "step": 10040, "task_loss": 0.15798111259937286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47506117820739746, "epoch": 8.49, "learning_rate": 7.561284868977177e-06, "loss": 0.3319, "step": 10041, "task_loss": 0.47026804089546204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3603743314743042, "epoch": 8.49, "learning_rate": 7.557058326289096e-06, "loss": 0.285, "step": 10042, "task_loss": 0.9409157037734985 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24001289904117584, "epoch": 8.49, "learning_rate": 7.552831783601015e-06, "loss": 0.3443, "step": 10043, "task_loss": 0.6532590985298157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.558394193649292, "epoch": 8.49, "learning_rate": 7.5486052409129325e-06, "loss": 0.3767, "step": 10044, "task_loss": 0.9818922877311707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5583081841468811, "epoch": 8.49, "learning_rate": 7.544378698224852e-06, "loss": 0.4201, "step": 10045, "task_loss": 0.6369345784187317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38640353083610535, "epoch": 8.49, "learning_rate": 7.540152155536771e-06, "loss": 0.3307, "step": 10046, "task_loss": 0.5349611639976501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5335134267807007, "epoch": 8.49, "learning_rate": 7.535925612848691e-06, "loss": 0.4176, "step": 10047, "task_loss": 0.8648219108581543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28317153453826904, "epoch": 8.49, "learning_rate": 7.53169907016061e-06, "loss": 0.3537, "step": 10048, "task_loss": 0.8494557738304138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3306199014186859, "epoch": 8.49, "learning_rate": 7.527472527472528e-06, "loss": 0.3661, "step": 10049, "task_loss": 0.04323067143559456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2506070137023926, "epoch": 8.5, "learning_rate": 7.523245984784447e-06, "loss": 0.3677, "step": 10050, "task_loss": 0.24322061240673065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.314693808555603, "epoch": 8.5, "learning_rate": 7.519019442096366e-06, "loss": 0.3821, "step": 10051, "task_loss": 0.4480632543563843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28280672430992126, "epoch": 8.5, "learning_rate": 7.514792899408285e-06, "loss": 0.3338, "step": 10052, "task_loss": 0.5980005264282227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3044377565383911, "epoch": 8.5, "learning_rate": 7.5105663567202025e-06, "loss": 0.414, "step": 10053, "task_loss": 0.8885044455528259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36480021476745605, "epoch": 8.5, "learning_rate": 7.506339814032122e-06, "loss": 0.3915, "step": 10054, "task_loss": 0.13071505725383759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3219485282897949, "epoch": 8.5, "learning_rate": 7.502113271344041e-06, "loss": 0.3008, "step": 10055, "task_loss": 0.8692150115966797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22662855684757233, "epoch": 8.5, "learning_rate": 7.49788672865596e-06, "loss": 0.3045, "step": 10056, "task_loss": 0.3828751742839813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.300873726606369, "epoch": 8.5, "learning_rate": 7.493660185967878e-06, "loss": 0.344, "step": 10057, "task_loss": 0.4453399181365967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3980865478515625, "epoch": 8.5, "learning_rate": 7.489433643279797e-06, "loss": 0.4711, "step": 10058, "task_loss": 0.7516759634017944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48048147559165955, "epoch": 8.5, "learning_rate": 7.485207100591717e-06, "loss": 0.3139, "step": 10059, "task_loss": 0.9466328620910645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30237358808517456, "epoch": 8.5, "learning_rate": 7.480980557903636e-06, "loss": 0.453, "step": 10060, "task_loss": 0.1238284558057785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3572167158126831, "epoch": 8.5, "learning_rate": 7.476754015215554e-06, "loss": 0.3489, "step": 10061, "task_loss": 0.7669247984886169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40110349655151367, "epoch": 8.51, "learning_rate": 7.4725274725274726e-06, "loss": 0.4407, "step": 10062, "task_loss": 0.8436221480369568 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3704720437526703, "epoch": 8.51, "learning_rate": 7.468300929839392e-06, "loss": 0.3647, "step": 10063, "task_loss": 0.4320041835308075 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17296141386032104, "epoch": 8.51, "learning_rate": 7.464074387151311e-06, "loss": 0.4688, "step": 10064, "task_loss": 0.8824068903923035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34670549631118774, "epoch": 8.51, "learning_rate": 7.459847844463229e-06, "loss": 0.4117, "step": 10065, "task_loss": 1.4002689123153687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2216339111328125, "epoch": 8.51, "learning_rate": 7.455621301775148e-06, "loss": 0.3675, "step": 10066, "task_loss": 0.14401815831661224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30027759075164795, "epoch": 8.51, "learning_rate": 7.451394759087067e-06, "loss": 0.4478, "step": 10067, "task_loss": 0.7981740236282349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4449779987335205, "epoch": 8.51, "learning_rate": 7.447168216398987e-06, "loss": 0.424, "step": 10068, "task_loss": 0.6555307507514954 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38975387811660767, "epoch": 8.51, "learning_rate": 7.442941673710904e-06, "loss": 0.3291, "step": 10069, "task_loss": 0.4789889454841614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2745557725429535, "epoch": 8.51, "learning_rate": 7.438715131022824e-06, "loss": 0.2907, "step": 10070, "task_loss": 0.28161874413490295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2654450535774231, "epoch": 8.51, "learning_rate": 7.434488588334743e-06, "loss": 0.4382, "step": 10071, "task_loss": 0.31980106234550476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21384823322296143, "epoch": 8.51, "learning_rate": 7.4302620456466615e-06, "loss": 0.2253, "step": 10072, "task_loss": 0.36582908034324646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41527092456817627, "epoch": 8.51, "learning_rate": 7.4260355029585795e-06, "loss": 0.48, "step": 10073, "task_loss": 0.47583556175231934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35797134041786194, "epoch": 8.52, "learning_rate": 7.421808960270498e-06, "loss": 0.4755, "step": 10074, "task_loss": 1.0336451530456543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5484205484390259, "epoch": 8.52, "learning_rate": 7.417582417582418e-06, "loss": 0.6047, "step": 10075, "task_loss": 1.4436770677566528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22701247036457062, "epoch": 8.52, "learning_rate": 7.413355874894337e-06, "loss": 0.4339, "step": 10076, "task_loss": 0.4193384051322937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3061693608760834, "epoch": 8.52, "learning_rate": 7.409129332206256e-06, "loss": 0.4007, "step": 10077, "task_loss": 0.4975164830684662 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32204893231391907, "epoch": 8.52, "learning_rate": 7.404902789518174e-06, "loss": 0.3968, "step": 10078, "task_loss": 0.49848952889442444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3851049840450287, "epoch": 8.52, "learning_rate": 7.400676246830094e-06, "loss": 0.349, "step": 10079, "task_loss": 0.5062575340270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3190443813800812, "epoch": 8.52, "learning_rate": 7.396449704142013e-06, "loss": 0.3175, "step": 10080, "task_loss": 1.091103434562683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.278963565826416, "epoch": 8.52, "learning_rate": 7.3922231614539315e-06, "loss": 0.3282, "step": 10081, "task_loss": 0.2635341286659241 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3126611113548279, "epoch": 8.52, "learning_rate": 7.3879966187658495e-06, "loss": 0.3452, "step": 10082, "task_loss": 0.663848876953125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41551756858825684, "epoch": 8.52, "learning_rate": 7.3837700760777684e-06, "loss": 0.4359, "step": 10083, "task_loss": 0.37026870250701904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34381431341171265, "epoch": 8.52, "learning_rate": 7.379543533389688e-06, "loss": 0.4286, "step": 10084, "task_loss": 1.0224076509475708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22305767238140106, "epoch": 8.52, "learning_rate": 7.375316990701607e-06, "loss": 0.3554, "step": 10085, "task_loss": 0.10315810889005661 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38133835792541504, "epoch": 8.53, "learning_rate": 7.371090448013525e-06, "loss": 0.4172, "step": 10086, "task_loss": 0.5190756320953369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3535868525505066, "epoch": 8.53, "learning_rate": 7.366863905325444e-06, "loss": 0.4403, "step": 10087, "task_loss": 1.0829070806503296 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2347486913204193, "epoch": 8.53, "learning_rate": 7.362637362637363e-06, "loss": 0.3283, "step": 10088, "task_loss": 0.8213214874267578 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21849556267261505, "epoch": 8.53, "learning_rate": 7.358410819949283e-06, "loss": 0.4049, "step": 10089, "task_loss": 0.8195666074752808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46921515464782715, "epoch": 8.53, "learning_rate": 7.3541842772612e-06, "loss": 0.3611, "step": 10090, "task_loss": 0.3248871862888336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3546496331691742, "epoch": 8.53, "learning_rate": 7.3499577345731196e-06, "loss": 0.3851, "step": 10091, "task_loss": 0.21330218017101288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19309554994106293, "epoch": 8.53, "learning_rate": 7.3457311918850385e-06, "loss": 0.3724, "step": 10092, "task_loss": 0.10121327638626099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5482072234153748, "epoch": 8.53, "learning_rate": 7.341504649196957e-06, "loss": 0.4397, "step": 10093, "task_loss": 1.7027041912078857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29318809509277344, "epoch": 8.53, "learning_rate": 7.337278106508875e-06, "loss": 0.3756, "step": 10094, "task_loss": 1.165995478630066 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5020350217819214, "epoch": 8.53, "learning_rate": 7.333051563820795e-06, "loss": 0.3411, "step": 10095, "task_loss": 0.48852604627609253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2605220377445221, "epoch": 8.53, "learning_rate": 7.328825021132714e-06, "loss": 0.3025, "step": 10096, "task_loss": 0.4516128599643707 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31064432859420776, "epoch": 8.53, "learning_rate": 7.324598478444633e-06, "loss": 0.406, "step": 10097, "task_loss": 0.7675235271453857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4627359211444855, "epoch": 8.54, "learning_rate": 7.320371935756551e-06, "loss": 0.4223, "step": 10098, "task_loss": 1.088073968887329 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23597946763038635, "epoch": 8.54, "learning_rate": 7.31614539306847e-06, "loss": 0.3262, "step": 10099, "task_loss": 0.31622835993766785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43826401233673096, "epoch": 8.54, "learning_rate": 7.31191885038039e-06, "loss": 0.4123, "step": 10100, "task_loss": 0.23514068126678467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4982466697692871, "epoch": 8.54, "learning_rate": 7.3076923076923085e-06, "loss": 0.5006, "step": 10101, "task_loss": 0.4700887203216553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34766313433647156, "epoch": 8.54, "learning_rate": 7.3034657650042265e-06, "loss": 0.4419, "step": 10102, "task_loss": 0.4832528829574585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3392738997936249, "epoch": 8.54, "learning_rate": 7.299239222316145e-06, "loss": 0.4125, "step": 10103, "task_loss": 0.4250722825527191 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29427361488342285, "epoch": 8.54, "learning_rate": 7.295012679628064e-06, "loss": 0.3331, "step": 10104, "task_loss": 0.8826963901519775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24012696743011475, "epoch": 8.54, "learning_rate": 7.290786136939984e-06, "loss": 0.3213, "step": 10105, "task_loss": 0.5127995014190674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28859108686447144, "epoch": 8.54, "learning_rate": 7.286559594251901e-06, "loss": 0.4814, "step": 10106, "task_loss": 0.22149032354354858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5038437247276306, "epoch": 8.54, "learning_rate": 7.282333051563821e-06, "loss": 0.4396, "step": 10107, "task_loss": 0.7255251407623291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.437401682138443, "epoch": 8.54, "learning_rate": 7.27810650887574e-06, "loss": 0.3674, "step": 10108, "task_loss": 0.8912453651428223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34648585319519043, "epoch": 8.54, "learning_rate": 7.273879966187659e-06, "loss": 0.3254, "step": 10109, "task_loss": 0.6494439244270325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4895307719707489, "epoch": 8.55, "learning_rate": 7.2696534234995785e-06, "loss": 0.3736, "step": 10110, "task_loss": 0.7184210419654846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5675989985466003, "epoch": 8.55, "learning_rate": 7.2654268808114966e-06, "loss": 0.3738, "step": 10111, "task_loss": 0.5886959433555603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5565800666809082, "epoch": 8.55, "learning_rate": 7.2612003381234154e-06, "loss": 0.4729, "step": 10112, "task_loss": 0.5893681645393372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38432931900024414, "epoch": 8.55, "learning_rate": 7.256973795435334e-06, "loss": 0.395, "step": 10113, "task_loss": 0.38745537400245667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5947434902191162, "epoch": 8.55, "learning_rate": 7.252747252747254e-06, "loss": 0.4227, "step": 10114, "task_loss": 1.39102041721344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39933276176452637, "epoch": 8.55, "learning_rate": 7.248520710059171e-06, "loss": 0.3518, "step": 10115, "task_loss": 1.1552631855010986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19872742891311646, "epoch": 8.55, "learning_rate": 7.244294167371091e-06, "loss": 0.3189, "step": 10116, "task_loss": 0.47361767292022705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21623800694942474, "epoch": 8.55, "learning_rate": 7.24006762468301e-06, "loss": 0.3268, "step": 10117, "task_loss": 0.727571964263916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19654525816440582, "epoch": 8.55, "learning_rate": 7.235841081994929e-06, "loss": 0.3962, "step": 10118, "task_loss": 0.011826390400528908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.387768030166626, "epoch": 8.55, "learning_rate": 7.231614539306847e-06, "loss": 0.3388, "step": 10119, "task_loss": 0.20608526468276978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3727930188179016, "epoch": 8.55, "learning_rate": 7.227387996618766e-06, "loss": 0.2993, "step": 10120, "task_loss": 0.3519766628742218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5103909969329834, "epoch": 8.56, "learning_rate": 7.2231614539306855e-06, "loss": 0.4435, "step": 10121, "task_loss": 0.8131870627403259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2915813624858856, "epoch": 8.56, "learning_rate": 7.218934911242604e-06, "loss": 0.3638, "step": 10122, "task_loss": 0.4115925431251526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2068556398153305, "epoch": 8.56, "learning_rate": 7.214708368554522e-06, "loss": 0.2987, "step": 10123, "task_loss": 1.0224032402038574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.468778133392334, "epoch": 8.56, "learning_rate": 7.210481825866441e-06, "loss": 0.333, "step": 10124, "task_loss": 0.5400423407554626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23247352242469788, "epoch": 8.56, "learning_rate": 7.20625528317836e-06, "loss": 0.2747, "step": 10125, "task_loss": 0.3630013167858124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27808061242103577, "epoch": 8.56, "learning_rate": 7.20202874049028e-06, "loss": 0.3306, "step": 10126, "task_loss": 0.6817033290863037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2409682720899582, "epoch": 8.56, "learning_rate": 7.197802197802198e-06, "loss": 0.3599, "step": 10127, "task_loss": 0.1748778074979782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.498413622379303, "epoch": 8.56, "learning_rate": 7.193575655114117e-06, "loss": 0.3817, "step": 10128, "task_loss": 0.21646666526794434 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3146403431892395, "epoch": 8.56, "learning_rate": 7.189349112426036e-06, "loss": 0.3904, "step": 10129, "task_loss": 0.11147406697273254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48959246277809143, "epoch": 8.56, "learning_rate": 7.1851225697379555e-06, "loss": 0.465, "step": 10130, "task_loss": 0.6751667261123657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5285665392875671, "epoch": 8.56, "learning_rate": 7.180896027049873e-06, "loss": 0.4119, "step": 10131, "task_loss": 0.6807984709739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.487068235874176, "epoch": 8.56, "learning_rate": 7.1766694843617924e-06, "loss": 0.3189, "step": 10132, "task_loss": 0.868852972984314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39783355593681335, "epoch": 8.57, "learning_rate": 7.172442941673711e-06, "loss": 0.3746, "step": 10133, "task_loss": 0.6925401091575623 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2802753448486328, "epoch": 8.57, "learning_rate": 7.16821639898563e-06, "loss": 0.3745, "step": 10134, "task_loss": 0.31753256916999817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2982405424118042, "epoch": 8.57, "learning_rate": 7.163989856297548e-06, "loss": 0.4042, "step": 10135, "task_loss": 0.5780658721923828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31185731291770935, "epoch": 8.57, "learning_rate": 7.159763313609467e-06, "loss": 0.367, "step": 10136, "task_loss": 0.4677661061286926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2681635022163391, "epoch": 8.57, "learning_rate": 7.155536770921387e-06, "loss": 0.4478, "step": 10137, "task_loss": 0.3334159851074219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6161929368972778, "epoch": 8.57, "learning_rate": 7.151310228233306e-06, "loss": 0.4481, "step": 10138, "task_loss": 1.0833162069320679 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33256983757019043, "epoch": 8.57, "learning_rate": 7.147083685545225e-06, "loss": 0.3505, "step": 10139, "task_loss": 0.48805513978004456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3421381115913391, "epoch": 8.57, "learning_rate": 7.142857142857143e-06, "loss": 0.479, "step": 10140, "task_loss": 0.16572847962379456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24007548391819, "epoch": 8.57, "learning_rate": 7.138630600169062e-06, "loss": 0.3268, "step": 10141, "task_loss": 0.21054160594940186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20671992003917694, "epoch": 8.57, "learning_rate": 7.134404057480981e-06, "loss": 0.1864, "step": 10142, "task_loss": 0.22633452713489532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2945789098739624, "epoch": 8.57, "learning_rate": 7.1301775147929e-06, "loss": 0.3734, "step": 10143, "task_loss": 1.0199781656265259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24160727858543396, "epoch": 8.57, "learning_rate": 7.125950972104818e-06, "loss": 0.2778, "step": 10144, "task_loss": 0.14128464460372925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21019797027111053, "epoch": 8.58, "learning_rate": 7.121724429416737e-06, "loss": 0.3023, "step": 10145, "task_loss": 0.46944016218185425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18696178495883942, "epoch": 8.58, "learning_rate": 7.117497886728657e-06, "loss": 0.3682, "step": 10146, "task_loss": 0.18120352923870087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4178585410118103, "epoch": 8.58, "learning_rate": 7.113271344040576e-06, "loss": 0.3425, "step": 10147, "task_loss": 0.8405928611755371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13897255063056946, "epoch": 8.58, "learning_rate": 7.109044801352494e-06, "loss": 0.3161, "step": 10148, "task_loss": 0.39884504675865173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4695075750350952, "epoch": 8.58, "learning_rate": 7.104818258664413e-06, "loss": 0.4271, "step": 10149, "task_loss": 0.6146615147590637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4751514196395874, "epoch": 8.58, "learning_rate": 7.100591715976332e-06, "loss": 0.3442, "step": 10150, "task_loss": 0.41544121503829956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2777957320213318, "epoch": 8.58, "learning_rate": 7.096365173288251e-06, "loss": 0.3404, "step": 10151, "task_loss": 0.110404372215271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48751333355903625, "epoch": 8.58, "learning_rate": 7.0921386306001686e-06, "loss": 0.422, "step": 10152, "task_loss": 0.20621347427368164 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3942243456840515, "epoch": 8.58, "learning_rate": 7.087912087912088e-06, "loss": 0.3859, "step": 10153, "task_loss": 0.6490058898925781 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2008228600025177, "epoch": 8.58, "learning_rate": 7.083685545224007e-06, "loss": 0.3257, "step": 10154, "task_loss": 0.05771105736494064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4592174291610718, "epoch": 8.58, "learning_rate": 7.079459002535926e-06, "loss": 0.4414, "step": 10155, "task_loss": 0.6298792958259583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37410280108451843, "epoch": 8.58, "learning_rate": 7.075232459847844e-06, "loss": 0.4035, "step": 10156, "task_loss": 0.5367450714111328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19028428196907043, "epoch": 8.59, "learning_rate": 7.071005917159763e-06, "loss": 0.2886, "step": 10157, "task_loss": 0.4632282555103302 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3145682215690613, "epoch": 8.59, "learning_rate": 7.066779374471683e-06, "loss": 0.2593, "step": 10158, "task_loss": 0.26095959544181824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29716023802757263, "epoch": 8.59, "learning_rate": 7.062552831783602e-06, "loss": 0.4543, "step": 10159, "task_loss": 0.47853729128837585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2286374717950821, "epoch": 8.59, "learning_rate": 7.05832628909552e-06, "loss": 0.3965, "step": 10160, "task_loss": 0.5313234925270081 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3271198868751526, "epoch": 8.59, "learning_rate": 7.054099746407439e-06, "loss": 0.4234, "step": 10161, "task_loss": 0.2500517964363098 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3360799252986908, "epoch": 8.59, "learning_rate": 7.049873203719358e-06, "loss": 0.3892, "step": 10162, "task_loss": 0.6327769756317139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3912489414215088, "epoch": 8.59, "learning_rate": 7.045646661031277e-06, "loss": 0.3504, "step": 10163, "task_loss": 1.0093562602996826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33680081367492676, "epoch": 8.59, "learning_rate": 7.041420118343195e-06, "loss": 0.3318, "step": 10164, "task_loss": 0.3986647427082062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31090137362480164, "epoch": 8.59, "learning_rate": 7.037193575655114e-06, "loss": 0.3545, "step": 10165, "task_loss": 0.5555381774902344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31631386280059814, "epoch": 8.59, "learning_rate": 7.032967032967033e-06, "loss": 0.315, "step": 10166, "task_loss": 0.12015603482723236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35853099822998047, "epoch": 8.59, "learning_rate": 7.028740490278953e-06, "loss": 0.3235, "step": 10167, "task_loss": 0.3177771270275116 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3126335144042969, "epoch": 8.59, "learning_rate": 7.024513947590872e-06, "loss": 0.3528, "step": 10168, "task_loss": 0.542525589466095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32863014936447144, "epoch": 8.6, "learning_rate": 7.02028740490279e-06, "loss": 0.3917, "step": 10169, "task_loss": 0.47550228238105774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3473438024520874, "epoch": 8.6, "learning_rate": 7.016060862214709e-06, "loss": 0.4131, "step": 10170, "task_loss": 1.3606388568878174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3230997323989868, "epoch": 8.6, "learning_rate": 7.0118343195266275e-06, "loss": 0.3996, "step": 10171, "task_loss": 0.33820974826812744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2735862135887146, "epoch": 8.6, "learning_rate": 7.007607776838547e-06, "loss": 0.4653, "step": 10172, "task_loss": 0.4941348135471344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28236478567123413, "epoch": 8.6, "learning_rate": 7.0033812341504644e-06, "loss": 0.4992, "step": 10173, "task_loss": 0.38389989733695984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3128219246864319, "epoch": 8.6, "learning_rate": 6.999154691462384e-06, "loss": 0.3308, "step": 10174, "task_loss": 0.6091203093528748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36828166246414185, "epoch": 8.6, "learning_rate": 6.994928148774303e-06, "loss": 0.3239, "step": 10175, "task_loss": 0.44792771339416504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47807371616363525, "epoch": 8.6, "learning_rate": 6.990701606086222e-06, "loss": 0.4251, "step": 10176, "task_loss": 0.5433327555656433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6082231998443604, "epoch": 8.6, "learning_rate": 6.98647506339814e-06, "loss": 0.5098, "step": 10177, "task_loss": 0.3901612460613251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3770318031311035, "epoch": 8.6, "learning_rate": 6.98224852071006e-06, "loss": 0.425, "step": 10178, "task_loss": 0.48897072672843933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25661683082580566, "epoch": 8.6, "learning_rate": 6.978021978021979e-06, "loss": 0.4231, "step": 10179, "task_loss": 0.032284293323755264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2921563684940338, "epoch": 8.6, "learning_rate": 6.9737954353338975e-06, "loss": 0.4419, "step": 10180, "task_loss": 0.6852441430091858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3942301869392395, "epoch": 8.61, "learning_rate": 6.9695688926458156e-06, "loss": 0.4107, "step": 10181, "task_loss": 0.35574039816856384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5849505066871643, "epoch": 8.61, "learning_rate": 6.9653423499577345e-06, "loss": 0.4988, "step": 10182, "task_loss": 0.8089973330497742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19590267539024353, "epoch": 8.61, "learning_rate": 6.961115807269654e-06, "loss": 0.3233, "step": 10183, "task_loss": 0.08618414402008057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24896256625652313, "epoch": 8.61, "learning_rate": 6.956889264581573e-06, "loss": 0.3564, "step": 10184, "task_loss": 0.8752989768981934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5699318647384644, "epoch": 8.61, "learning_rate": 6.952662721893491e-06, "loss": 0.3834, "step": 10185, "task_loss": 0.4956744313240051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2537305951118469, "epoch": 8.61, "learning_rate": 6.94843617920541e-06, "loss": 0.3133, "step": 10186, "task_loss": 0.3263333737850189 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5843814611434937, "epoch": 8.61, "learning_rate": 6.944209636517329e-06, "loss": 0.4289, "step": 10187, "task_loss": 0.4522673487663269 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3710978925228119, "epoch": 8.61, "learning_rate": 6.939983093829249e-06, "loss": 0.4026, "step": 10188, "task_loss": 1.0115045309066772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37507009506225586, "epoch": 8.61, "learning_rate": 6.935756551141166e-06, "loss": 0.287, "step": 10189, "task_loss": 0.6558169722557068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3815043866634369, "epoch": 8.61, "learning_rate": 6.931530008453086e-06, "loss": 0.3995, "step": 10190, "task_loss": 1.0059436559677124 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6301705241203308, "epoch": 8.61, "learning_rate": 6.9273034657650045e-06, "loss": 0.3305, "step": 10191, "task_loss": 0.4730185270309448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32997334003448486, "epoch": 8.61, "learning_rate": 6.923076923076923e-06, "loss": 0.3405, "step": 10192, "task_loss": 0.1453944742679596 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4705065190792084, "epoch": 8.62, "learning_rate": 6.9188503803888414e-06, "loss": 0.4816, "step": 10193, "task_loss": 0.8967434763908386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.321374773979187, "epoch": 8.62, "learning_rate": 6.914623837700761e-06, "loss": 0.3652, "step": 10194, "task_loss": 0.278384268283844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5251705646514893, "epoch": 8.62, "learning_rate": 6.91039729501268e-06, "loss": 0.5152, "step": 10195, "task_loss": 0.4618043005466461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36276179552078247, "epoch": 8.62, "learning_rate": 6.906170752324599e-06, "loss": 0.4296, "step": 10196, "task_loss": 0.34141767024993896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32338857650756836, "epoch": 8.62, "learning_rate": 6.901944209636519e-06, "loss": 0.4033, "step": 10197, "task_loss": 0.3074069917201996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38566267490386963, "epoch": 8.62, "learning_rate": 6.897717666948436e-06, "loss": 0.4666, "step": 10198, "task_loss": 0.5800369381904602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35965853929519653, "epoch": 8.62, "learning_rate": 6.893491124260356e-06, "loss": 0.3819, "step": 10199, "task_loss": 0.3991096317768097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21260502934455872, "epoch": 8.62, "learning_rate": 6.8892645815722745e-06, "loss": 0.3631, "step": 10200, "task_loss": 0.5688323378562927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1918761432170868, "epoch": 8.62, "learning_rate": 6.885038038884193e-06, "loss": 0.2916, "step": 10201, "task_loss": 0.4770876467227936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2169959396123886, "epoch": 8.62, "learning_rate": 6.8808114961961115e-06, "loss": 0.3478, "step": 10202, "task_loss": 0.8480307459831238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23685654997825623, "epoch": 8.62, "learning_rate": 6.87658495350803e-06, "loss": 0.3525, "step": 10203, "task_loss": 0.1132311001420021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41936078667640686, "epoch": 8.63, "learning_rate": 6.87235841081995e-06, "loss": 0.3639, "step": 10204, "task_loss": 0.4771448075771332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2836070656776428, "epoch": 8.63, "learning_rate": 6.868131868131869e-06, "loss": 0.33, "step": 10205, "task_loss": 0.5479159355163574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20466679334640503, "epoch": 8.63, "learning_rate": 6.863905325443787e-06, "loss": 0.235, "step": 10206, "task_loss": 0.08658955991268158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23490110039710999, "epoch": 8.63, "learning_rate": 6.859678782755706e-06, "loss": 0.4458, "step": 10207, "task_loss": 0.36428141593933105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33005669713020325, "epoch": 8.63, "learning_rate": 6.855452240067625e-06, "loss": 0.376, "step": 10208, "task_loss": 0.5652322173118591 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3751472234725952, "epoch": 8.63, "learning_rate": 6.8512256973795445e-06, "loss": 0.331, "step": 10209, "task_loss": 0.47269508242607117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32046210765838623, "epoch": 8.63, "learning_rate": 6.846999154691463e-06, "loss": 0.4281, "step": 10210, "task_loss": 0.6363443732261658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6729910969734192, "epoch": 8.63, "learning_rate": 6.8427726120033815e-06, "loss": 0.4304, "step": 10211, "task_loss": 1.261833667755127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3313552141189575, "epoch": 8.63, "learning_rate": 6.8385460693153e-06, "loss": 0.4255, "step": 10212, "task_loss": 0.9893395304679871 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2731635570526123, "epoch": 8.63, "learning_rate": 6.83431952662722e-06, "loss": 0.2797, "step": 10213, "task_loss": 0.5395263433456421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3885250687599182, "epoch": 8.63, "learning_rate": 6.830092983939137e-06, "loss": 0.3402, "step": 10214, "task_loss": 0.6025234460830688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3497835695743561, "epoch": 8.63, "learning_rate": 6.825866441251057e-06, "loss": 0.4138, "step": 10215, "task_loss": 0.965369462966919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30603545904159546, "epoch": 8.64, "learning_rate": 6.821639898562976e-06, "loss": 0.4118, "step": 10216, "task_loss": 0.7757984399795532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3154016137123108, "epoch": 8.64, "learning_rate": 6.817413355874895e-06, "loss": 0.4509, "step": 10217, "task_loss": 1.183670997619629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.501570999622345, "epoch": 8.64, "learning_rate": 6.813186813186813e-06, "loss": 0.3618, "step": 10218, "task_loss": 0.5592185854911804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1721823811531067, "epoch": 8.64, "learning_rate": 6.808960270498732e-06, "loss": 0.3182, "step": 10219, "task_loss": 0.014799512922763824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2652093470096588, "epoch": 8.64, "learning_rate": 6.8047337278106515e-06, "loss": 0.3558, "step": 10220, "task_loss": 0.7648015022277832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4824661910533905, "epoch": 8.64, "learning_rate": 6.80050718512257e-06, "loss": 0.3913, "step": 10221, "task_loss": 1.0660936832427979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37593889236450195, "epoch": 8.64, "learning_rate": 6.7962806424344884e-06, "loss": 0.4496, "step": 10222, "task_loss": 0.5146792531013489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2139626443386078, "epoch": 8.64, "learning_rate": 6.792054099746407e-06, "loss": 0.3522, "step": 10223, "task_loss": 0.42508557438850403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.10494861751794815, "epoch": 8.64, "learning_rate": 6.787827557058326e-06, "loss": 0.249, "step": 10224, "task_loss": 0.009125777520239353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3527168333530426, "epoch": 8.64, "learning_rate": 6.783601014370246e-06, "loss": 0.3523, "step": 10225, "task_loss": 0.14795461297035217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20544445514678955, "epoch": 8.64, "learning_rate": 6.779374471682165e-06, "loss": 0.3696, "step": 10226, "task_loss": 0.18496811389923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31022948026657104, "epoch": 8.64, "learning_rate": 6.775147928994083e-06, "loss": 0.3727, "step": 10227, "task_loss": 0.8266865611076355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42576247453689575, "epoch": 8.65, "learning_rate": 6.770921386306002e-06, "loss": 0.3534, "step": 10228, "task_loss": 0.3835059404373169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2530650794506073, "epoch": 8.65, "learning_rate": 6.7666948436179215e-06, "loss": 0.3331, "step": 10229, "task_loss": 0.32775741815567017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49093538522720337, "epoch": 8.65, "learning_rate": 6.76246830092984e-06, "loss": 0.4462, "step": 10230, "task_loss": 0.4900144040584564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.509456992149353, "epoch": 8.65, "learning_rate": 6.7582417582417585e-06, "loss": 0.4056, "step": 10231, "task_loss": 0.5201508402824402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14230309426784515, "epoch": 8.65, "learning_rate": 6.754015215553677e-06, "loss": 0.3252, "step": 10232, "task_loss": 0.36747416853904724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40386271476745605, "epoch": 8.65, "learning_rate": 6.749788672865596e-06, "loss": 0.4036, "step": 10233, "task_loss": 0.6796191334724426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5331763625144958, "epoch": 8.65, "learning_rate": 6.745562130177516e-06, "loss": 0.4447, "step": 10234, "task_loss": 0.568722128868103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38653236627578735, "epoch": 8.65, "learning_rate": 6.741335587489433e-06, "loss": 0.4303, "step": 10235, "task_loss": 1.2374249696731567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2432292401790619, "epoch": 8.65, "learning_rate": 6.737109044801353e-06, "loss": 0.3554, "step": 10236, "task_loss": 0.8909711837768555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38222235441207886, "epoch": 8.65, "learning_rate": 6.732882502113272e-06, "loss": 0.3212, "step": 10237, "task_loss": 0.07854944467544556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3896022439002991, "epoch": 8.65, "learning_rate": 6.728655959425191e-06, "loss": 0.3791, "step": 10238, "task_loss": 0.8703241348266602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26854413747787476, "epoch": 8.65, "learning_rate": 6.724429416737109e-06, "loss": 0.4316, "step": 10239, "task_loss": 0.8514972925186157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2054835706949234, "epoch": 8.66, "learning_rate": 6.720202874049028e-06, "loss": 0.4011, "step": 10240, "task_loss": 1.0571436882019043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4221191108226776, "epoch": 8.66, "learning_rate": 6.715976331360947e-06, "loss": 0.3571, "step": 10241, "task_loss": 0.048402491956949234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19214095175266266, "epoch": 8.66, "learning_rate": 6.711749788672866e-06, "loss": 0.2677, "step": 10242, "task_loss": 0.14545761048793793 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2103838324546814, "epoch": 8.66, "learning_rate": 6.707523245984784e-06, "loss": 0.3149, "step": 10243, "task_loss": 0.058164045214653015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34784746170043945, "epoch": 8.66, "learning_rate": 6.703296703296703e-06, "loss": 0.4367, "step": 10244, "task_loss": 0.32112735509872437 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.465974897146225, "epoch": 8.66, "learning_rate": 6.699070160608623e-06, "loss": 0.346, "step": 10245, "task_loss": 0.41615378856658936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.319076806306839, "epoch": 8.66, "learning_rate": 6.694843617920542e-06, "loss": 0.4378, "step": 10246, "task_loss": 0.4854883849620819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36962783336639404, "epoch": 8.66, "learning_rate": 6.69061707523246e-06, "loss": 0.5306, "step": 10247, "task_loss": 1.4593161344528198 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44404715299606323, "epoch": 8.66, "learning_rate": 6.686390532544379e-06, "loss": 0.4139, "step": 10248, "task_loss": 0.8341437578201294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23270398378372192, "epoch": 8.66, "learning_rate": 6.682163989856298e-06, "loss": 0.3516, "step": 10249, "task_loss": 0.2919387221336365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4469876289367676, "epoch": 8.66, "learning_rate": 6.677937447168217e-06, "loss": 0.3963, "step": 10250, "task_loss": 1.6100282669067383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37517040967941284, "epoch": 8.66, "learning_rate": 6.673710904480135e-06, "loss": 0.3873, "step": 10251, "task_loss": 0.40287190675735474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40515023469924927, "epoch": 8.67, "learning_rate": 6.669484361792054e-06, "loss": 0.3603, "step": 10252, "task_loss": 0.9230337738990784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5311192274093628, "epoch": 8.67, "learning_rate": 6.665257819103973e-06, "loss": 0.4707, "step": 10253, "task_loss": 1.1787490844726562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2533622086048126, "epoch": 8.67, "learning_rate": 6.661031276415892e-06, "loss": 0.3364, "step": 10254, "task_loss": 0.40570521354675293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3649572432041168, "epoch": 8.67, "learning_rate": 6.656804733727812e-06, "loss": 0.3396, "step": 10255, "task_loss": 0.5936547517776489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4385209083557129, "epoch": 8.67, "learning_rate": 6.652578191039729e-06, "loss": 0.3883, "step": 10256, "task_loss": 0.09599509835243225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2012832760810852, "epoch": 8.67, "learning_rate": 6.648351648351649e-06, "loss": 0.3553, "step": 10257, "task_loss": 0.5136467218399048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2455396205186844, "epoch": 8.67, "learning_rate": 6.644125105663568e-06, "loss": 0.4163, "step": 10258, "task_loss": 0.25033038854599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33249711990356445, "epoch": 8.67, "learning_rate": 6.639898562975487e-06, "loss": 0.3978, "step": 10259, "task_loss": 0.16501210629940033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23528704047203064, "epoch": 8.67, "learning_rate": 6.635672020287405e-06, "loss": 0.37, "step": 10260, "task_loss": 0.46407756209373474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4347486197948456, "epoch": 8.67, "learning_rate": 6.631445477599324e-06, "loss": 0.3962, "step": 10261, "task_loss": 0.3760209083557129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34073951840400696, "epoch": 8.67, "learning_rate": 6.627218934911243e-06, "loss": 0.3091, "step": 10262, "task_loss": 0.38485804200172424 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43871307373046875, "epoch": 8.67, "learning_rate": 6.622992392223162e-06, "loss": 0.3148, "step": 10263, "task_loss": 0.5221733450889587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3766127824783325, "epoch": 8.68, "learning_rate": 6.61876584953508e-06, "loss": 0.3203, "step": 10264, "task_loss": 0.6137000322341919 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5030971169471741, "epoch": 8.68, "learning_rate": 6.614539306846999e-06, "loss": 0.3669, "step": 10265, "task_loss": 0.5914580821990967 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.273930162191391, "epoch": 8.68, "learning_rate": 6.610312764158919e-06, "loss": 0.312, "step": 10266, "task_loss": 0.5968537330627441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37613046169281006, "epoch": 8.68, "learning_rate": 6.606086221470838e-06, "loss": 0.4175, "step": 10267, "task_loss": 1.101049542427063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3683258295059204, "epoch": 8.68, "learning_rate": 6.601859678782756e-06, "loss": 0.3889, "step": 10268, "task_loss": 1.4945214986801147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1301913559436798, "epoch": 8.68, "learning_rate": 6.597633136094675e-06, "loss": 0.2717, "step": 10269, "task_loss": 0.3503168821334839 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28418534994125366, "epoch": 8.68, "learning_rate": 6.5934065934065935e-06, "loss": 0.4259, "step": 10270, "task_loss": 1.119240641593933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3358052968978882, "epoch": 8.68, "learning_rate": 6.589180050718513e-06, "loss": 0.3474, "step": 10271, "task_loss": 1.6885454654693604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2359154224395752, "epoch": 8.68, "learning_rate": 6.5849535080304305e-06, "loss": 0.293, "step": 10272, "task_loss": 0.2343103289604187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41343820095062256, "epoch": 8.68, "learning_rate": 6.58072696534235e-06, "loss": 0.4545, "step": 10273, "task_loss": 1.726901650428772 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35470426082611084, "epoch": 8.68, "learning_rate": 6.576500422654269e-06, "loss": 0.3373, "step": 10274, "task_loss": 0.7061622142791748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3992096483707428, "epoch": 8.69, "learning_rate": 6.572273879966189e-06, "loss": 0.4136, "step": 10275, "task_loss": 0.9523466229438782 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4088340997695923, "epoch": 8.69, "learning_rate": 6.568047337278106e-06, "loss": 0.3615, "step": 10276, "task_loss": 0.15348869562149048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49036648869514465, "epoch": 8.69, "learning_rate": 6.563820794590026e-06, "loss": 0.4048, "step": 10277, "task_loss": 0.8995429277420044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2457422912120819, "epoch": 8.69, "learning_rate": 6.559594251901945e-06, "loss": 0.3209, "step": 10278, "task_loss": 0.1123863011598587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.608359694480896, "epoch": 8.69, "learning_rate": 6.5553677092138636e-06, "loss": 0.3883, "step": 10279, "task_loss": 0.5932134985923767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3319382071495056, "epoch": 8.69, "learning_rate": 6.551141166525782e-06, "loss": 0.415, "step": 10280, "task_loss": 1.541925311088562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40180307626724243, "epoch": 8.69, "learning_rate": 6.5469146238377005e-06, "loss": 0.3604, "step": 10281, "task_loss": 0.6386128664016724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3587697148323059, "epoch": 8.69, "learning_rate": 6.54268808114962e-06, "loss": 0.3047, "step": 10282, "task_loss": 0.2079063355922699 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4067291021347046, "epoch": 8.69, "learning_rate": 6.538461538461539e-06, "loss": 0.3765, "step": 10283, "task_loss": 1.3953568935394287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5291969776153564, "epoch": 8.69, "learning_rate": 6.534234995773457e-06, "loss": 0.328, "step": 10284, "task_loss": 0.4474439024925232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29968076944351196, "epoch": 8.69, "learning_rate": 6.530008453085376e-06, "loss": 0.3827, "step": 10285, "task_loss": 0.33003607392311096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4192046821117401, "epoch": 8.69, "learning_rate": 6.525781910397295e-06, "loss": 0.3447, "step": 10286, "task_loss": 0.18392977118492126 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5222430229187012, "epoch": 8.7, "learning_rate": 6.521555367709215e-06, "loss": 0.4146, "step": 10287, "task_loss": 0.4148479402065277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5136749744415283, "epoch": 8.7, "learning_rate": 6.517328825021134e-06, "loss": 0.4243, "step": 10288, "task_loss": 0.1982458084821701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2106117159128189, "epoch": 8.7, "learning_rate": 6.513102282333052e-06, "loss": 0.3634, "step": 10289, "task_loss": 0.547339141368866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19032999873161316, "epoch": 8.7, "learning_rate": 6.5088757396449705e-06, "loss": 0.3117, "step": 10290, "task_loss": 0.3296424448490143 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4238342344760895, "epoch": 8.7, "learning_rate": 6.50464919695689e-06, "loss": 0.4091, "step": 10291, "task_loss": 1.1447417736053467 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3265095055103302, "epoch": 8.7, "learning_rate": 6.500422654268809e-06, "loss": 0.3969, "step": 10292, "task_loss": 1.3084027767181396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2999575138092041, "epoch": 8.7, "learning_rate": 6.496196111580727e-06, "loss": 0.3973, "step": 10293, "task_loss": 0.25465816259384155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27479398250579834, "epoch": 8.7, "learning_rate": 6.491969568892646e-06, "loss": 0.6045, "step": 10294, "task_loss": 0.04663139581680298 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33985111117362976, "epoch": 8.7, "learning_rate": 6.487743026204565e-06, "loss": 0.347, "step": 10295, "task_loss": 0.055796485394239426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2140342742204666, "epoch": 8.7, "learning_rate": 6.483516483516485e-06, "loss": 0.4385, "step": 10296, "task_loss": 0.30671241879463196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18891829252243042, "epoch": 8.7, "learning_rate": 6.479289940828402e-06, "loss": 0.4616, "step": 10297, "task_loss": 0.2002723664045334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4374452829360962, "epoch": 8.7, "learning_rate": 6.475063398140322e-06, "loss": 0.3894, "step": 10298, "task_loss": 0.6016706228256226 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4171823561191559, "epoch": 8.71, "learning_rate": 6.4708368554522405e-06, "loss": 0.4266, "step": 10299, "task_loss": 0.2055545300245285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3659719228744507, "epoch": 8.71, "learning_rate": 6.4666103127641594e-06, "loss": 0.3899, "step": 10300, "task_loss": 0.32210463285446167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42170950770378113, "epoch": 8.71, "learning_rate": 6.4623837700760775e-06, "loss": 0.46, "step": 10301, "task_loss": 0.3794301748275757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4294230341911316, "epoch": 8.71, "learning_rate": 6.458157227387996e-06, "loss": 0.4792, "step": 10302, "task_loss": 0.5052094459533691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15707087516784668, "epoch": 8.71, "learning_rate": 6.453930684699916e-06, "loss": 0.372, "step": 10303, "task_loss": 0.6095816493034363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2863939702510834, "epoch": 8.71, "learning_rate": 6.449704142011835e-06, "loss": 0.3562, "step": 10304, "task_loss": 0.2628788948059082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8933894038200378, "epoch": 8.71, "learning_rate": 6.445477599323753e-06, "loss": 0.4605, "step": 10305, "task_loss": 0.8263062834739685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5210189819335938, "epoch": 8.71, "learning_rate": 6.441251056635672e-06, "loss": 0.4346, "step": 10306, "task_loss": 0.9245979189872742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31072527170181274, "epoch": 8.71, "learning_rate": 6.437024513947592e-06, "loss": 0.3562, "step": 10307, "task_loss": 0.4490794241428375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.328745573759079, "epoch": 8.71, "learning_rate": 6.4327979712595106e-06, "loss": 0.4424, "step": 10308, "task_loss": 0.3117794096469879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5559351444244385, "epoch": 8.71, "learning_rate": 6.428571428571429e-06, "loss": 0.4462, "step": 10309, "task_loss": 0.3710276484489441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2957094609737396, "epoch": 8.71, "learning_rate": 6.4243448858833475e-06, "loss": 0.3853, "step": 10310, "task_loss": 0.8715615272521973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3268918991088867, "epoch": 8.72, "learning_rate": 6.420118343195266e-06, "loss": 0.3275, "step": 10311, "task_loss": 1.5487751960754395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23484739661216736, "epoch": 8.72, "learning_rate": 6.415891800507186e-06, "loss": 0.3548, "step": 10312, "task_loss": 1.1772838830947876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23608151078224182, "epoch": 8.72, "learning_rate": 6.411665257819103e-06, "loss": 0.3252, "step": 10313, "task_loss": 0.24813468754291534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6550939083099365, "epoch": 8.72, "learning_rate": 6.407438715131023e-06, "loss": 0.4038, "step": 10314, "task_loss": 0.4283565282821655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44849255681037903, "epoch": 8.72, "learning_rate": 6.403212172442942e-06, "loss": 0.3482, "step": 10315, "task_loss": 1.8860489130020142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.460258424282074, "epoch": 8.72, "learning_rate": 6.398985629754861e-06, "loss": 0.3739, "step": 10316, "task_loss": 0.12443460524082184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1617927998304367, "epoch": 8.72, "learning_rate": 6.394759087066781e-06, "loss": 0.3285, "step": 10317, "task_loss": 0.8608511686325073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48226794600486755, "epoch": 8.72, "learning_rate": 6.390532544378698e-06, "loss": 0.4172, "step": 10318, "task_loss": 0.10933075845241547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4284980893135071, "epoch": 8.72, "learning_rate": 6.3863060016906175e-06, "loss": 0.3137, "step": 10319, "task_loss": 0.806400716304779 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3165992498397827, "epoch": 8.72, "learning_rate": 6.382079459002536e-06, "loss": 0.4303, "step": 10320, "task_loss": 0.4945444166660309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19129790365695953, "epoch": 8.72, "learning_rate": 6.377852916314455e-06, "loss": 0.4069, "step": 10321, "task_loss": 0.6050539016723633 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25220373272895813, "epoch": 8.72, "learning_rate": 6.373626373626373e-06, "loss": 0.4491, "step": 10322, "task_loss": 0.6629496216773987 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5800293684005737, "epoch": 8.73, "learning_rate": 6.369399830938293e-06, "loss": 0.4867, "step": 10323, "task_loss": 0.20879922807216644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2251797467470169, "epoch": 8.73, "learning_rate": 6.365173288250212e-06, "loss": 0.35, "step": 10324, "task_loss": 0.21417711675167084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28720808029174805, "epoch": 8.73, "learning_rate": 6.360946745562131e-06, "loss": 0.3782, "step": 10325, "task_loss": 0.2106892168521881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33009570837020874, "epoch": 8.73, "learning_rate": 6.356720202874049e-06, "loss": 0.4472, "step": 10326, "task_loss": 0.30901291966438293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32114356756210327, "epoch": 8.73, "learning_rate": 6.352493660185968e-06, "loss": 0.4017, "step": 10327, "task_loss": 1.3044452667236328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38691216707229614, "epoch": 8.73, "learning_rate": 6.3482671174978876e-06, "loss": 0.5121, "step": 10328, "task_loss": 0.8568069934844971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18394160270690918, "epoch": 8.73, "learning_rate": 6.3440405748098064e-06, "loss": 0.2875, "step": 10329, "task_loss": 0.266407310962677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3543054759502411, "epoch": 8.73, "learning_rate": 6.3398140321217245e-06, "loss": 0.3458, "step": 10330, "task_loss": 1.2154816389083862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3551786243915558, "epoch": 8.73, "learning_rate": 6.335587489433643e-06, "loss": 0.34, "step": 10331, "task_loss": 0.37037405371665955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13606339693069458, "epoch": 8.73, "learning_rate": 6.331360946745562e-06, "loss": 0.389, "step": 10332, "task_loss": 0.3315262198448181 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.192510724067688, "epoch": 8.73, "learning_rate": 6.327134404057482e-06, "loss": 0.305, "step": 10333, "task_loss": 0.5704902410507202 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37080585956573486, "epoch": 8.73, "learning_rate": 6.322907861369399e-06, "loss": 0.444, "step": 10334, "task_loss": 0.12751515209674835 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19843831658363342, "epoch": 8.74, "learning_rate": 6.318681318681319e-06, "loss": 0.3087, "step": 10335, "task_loss": 0.05416838079690933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3216116428375244, "epoch": 8.74, "learning_rate": 6.314454775993238e-06, "loss": 0.363, "step": 10336, "task_loss": 0.8008543252944946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6660699844360352, "epoch": 8.74, "learning_rate": 6.310228233305157e-06, "loss": 0.5347, "step": 10337, "task_loss": 0.9633574485778809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37600424885749817, "epoch": 8.74, "learning_rate": 6.306001690617075e-06, "loss": 0.3272, "step": 10338, "task_loss": 0.7214406132698059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3360324203968048, "epoch": 8.74, "learning_rate": 6.3017751479289945e-06, "loss": 0.372, "step": 10339, "task_loss": 1.4750044345855713 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2529590427875519, "epoch": 8.74, "learning_rate": 6.297548605240913e-06, "loss": 0.3516, "step": 10340, "task_loss": 0.7506409883499146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3550662398338318, "epoch": 8.74, "learning_rate": 6.293322062552832e-06, "loss": 0.3156, "step": 10341, "task_loss": 0.5594898462295532 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3803847134113312, "epoch": 8.74, "learning_rate": 6.28909551986475e-06, "loss": 0.3541, "step": 10342, "task_loss": 0.5479049682617188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3628426492214203, "epoch": 8.74, "learning_rate": 6.284868977176669e-06, "loss": 0.4476, "step": 10343, "task_loss": 0.8604093194007874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46164005994796753, "epoch": 8.74, "learning_rate": 6.280642434488589e-06, "loss": 0.464, "step": 10344, "task_loss": 0.8634071350097656 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27743399143218994, "epoch": 8.74, "learning_rate": 6.276415891800508e-06, "loss": 0.3853, "step": 10345, "task_loss": 0.3662146031856537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4610726833343506, "epoch": 8.75, "learning_rate": 6.272189349112427e-06, "loss": 0.4525, "step": 10346, "task_loss": 0.4866326153278351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39394712448120117, "epoch": 8.75, "learning_rate": 6.267962806424345e-06, "loss": 0.4394, "step": 10347, "task_loss": 0.4040084183216095 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3919912874698639, "epoch": 8.75, "learning_rate": 6.263736263736264e-06, "loss": 0.3861, "step": 10348, "task_loss": 0.6110310554504395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2109462171792984, "epoch": 8.75, "learning_rate": 6.2595097210481834e-06, "loss": 0.3792, "step": 10349, "task_loss": 0.5261234641075134 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1590849906206131, "epoch": 8.75, "learning_rate": 6.255283178360102e-06, "loss": 0.3519, "step": 10350, "task_loss": 0.09601344168186188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3943822383880615, "epoch": 8.75, "learning_rate": 6.25105663567202e-06, "loss": 0.4158, "step": 10351, "task_loss": 0.5499534606933594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3036785423755646, "epoch": 8.75, "learning_rate": 6.246830092983939e-06, "loss": 0.3511, "step": 10352, "task_loss": 0.7557385563850403 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3434774577617645, "epoch": 8.75, "learning_rate": 6.242603550295858e-06, "loss": 0.3887, "step": 10353, "task_loss": 0.920377790927887 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4558567702770233, "epoch": 8.75, "learning_rate": 6.238377007607777e-06, "loss": 0.3364, "step": 10354, "task_loss": 0.7366123199462891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3226267099380493, "epoch": 8.75, "learning_rate": 6.234150464919696e-06, "loss": 0.3676, "step": 10355, "task_loss": 0.41210582852363586 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24346661567687988, "epoch": 8.75, "learning_rate": 6.229923922231615e-06, "loss": 0.4562, "step": 10356, "task_loss": 0.48821502923965454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2848057746887207, "epoch": 8.75, "learning_rate": 6.225697379543534e-06, "loss": 0.3695, "step": 10357, "task_loss": 0.4714357852935791 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.163316547870636, "epoch": 8.76, "learning_rate": 6.221470836855453e-06, "loss": 0.2855, "step": 10358, "task_loss": 0.11252571642398834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3133004307746887, "epoch": 8.76, "learning_rate": 6.2172442941673715e-06, "loss": 0.3785, "step": 10359, "task_loss": 0.7690437436103821 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5922303199768066, "epoch": 8.76, "learning_rate": 6.21301775147929e-06, "loss": 0.3852, "step": 10360, "task_loss": 0.6204506754875183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2429821938276291, "epoch": 8.76, "learning_rate": 6.208791208791209e-06, "loss": 0.4169, "step": 10361, "task_loss": 0.4288635551929474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7432807683944702, "epoch": 8.76, "learning_rate": 6.204564666103127e-06, "loss": 0.5201, "step": 10362, "task_loss": 1.7511721849441528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35916954278945923, "epoch": 8.76, "learning_rate": 6.200338123415047e-06, "loss": 0.3256, "step": 10363, "task_loss": 0.1071631908416748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2498967945575714, "epoch": 8.76, "learning_rate": 6.196111580726965e-06, "loss": 0.3135, "step": 10364, "task_loss": 0.4970898926258087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2621099352836609, "epoch": 8.76, "learning_rate": 6.191885038038885e-06, "loss": 0.4896, "step": 10365, "task_loss": 0.5285915732383728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2422584593296051, "epoch": 8.76, "learning_rate": 6.187658495350803e-06, "loss": 0.2685, "step": 10366, "task_loss": 0.34828007221221924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2433885931968689, "epoch": 8.76, "learning_rate": 6.183431952662723e-06, "loss": 0.2831, "step": 10367, "task_loss": 0.18364499509334564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23117919266223907, "epoch": 8.76, "learning_rate": 6.179205409974641e-06, "loss": 0.404, "step": 10368, "task_loss": 0.15573401749134064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3647945821285248, "epoch": 8.76, "learning_rate": 6.1749788672865596e-06, "loss": 0.4435, "step": 10369, "task_loss": 0.9030464887619019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3593463897705078, "epoch": 8.77, "learning_rate": 6.1707523245984785e-06, "loss": 0.2983, "step": 10370, "task_loss": 0.36978694796562195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21983423829078674, "epoch": 8.77, "learning_rate": 6.166525781910397e-06, "loss": 0.4046, "step": 10371, "task_loss": 0.43657055497169495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22845976054668427, "epoch": 8.77, "learning_rate": 6.162299239222316e-06, "loss": 0.3491, "step": 10372, "task_loss": 0.0687982365489006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5453740358352661, "epoch": 8.77, "learning_rate": 6.158072696534235e-06, "loss": 0.3852, "step": 10373, "task_loss": 0.9454341530799866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21767321228981018, "epoch": 8.77, "learning_rate": 6.153846153846155e-06, "loss": 0.3012, "step": 10374, "task_loss": 0.17494787275791168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49846434593200684, "epoch": 8.77, "learning_rate": 6.149619611158073e-06, "loss": 0.4199, "step": 10375, "task_loss": 0.2116880714893341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42876359820365906, "epoch": 8.77, "learning_rate": 6.145393068469992e-06, "loss": 0.3657, "step": 10376, "task_loss": 0.49407392740249634 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31646621227264404, "epoch": 8.77, "learning_rate": 6.141166525781911e-06, "loss": 0.3802, "step": 10377, "task_loss": 0.40297001600265503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2238287329673767, "epoch": 8.77, "learning_rate": 6.13693998309383e-06, "loss": 0.3519, "step": 10378, "task_loss": 0.6684016585350037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3505731225013733, "epoch": 8.77, "learning_rate": 6.1327134404057485e-06, "loss": 0.4345, "step": 10379, "task_loss": 0.5631243586540222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.349264532327652, "epoch": 8.77, "learning_rate": 6.128486897717667e-06, "loss": 0.3689, "step": 10380, "task_loss": 0.9389679431915283 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7556191682815552, "epoch": 8.77, "learning_rate": 6.124260355029586e-06, "loss": 0.3926, "step": 10381, "task_loss": 0.11405347287654877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28499191999435425, "epoch": 8.78, "learning_rate": 6.120033812341505e-06, "loss": 0.2374, "step": 10382, "task_loss": 0.32449033856391907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31979113817214966, "epoch": 8.78, "learning_rate": 6.115807269653424e-06, "loss": 0.2905, "step": 10383, "task_loss": 0.44297459721565247 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23574215173721313, "epoch": 8.78, "learning_rate": 6.111580726965343e-06, "loss": 0.3739, "step": 10384, "task_loss": 0.4286256730556488 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.10998722910881042, "epoch": 8.78, "learning_rate": 6.107354184277261e-06, "loss": 0.3008, "step": 10385, "task_loss": 0.13414372503757477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3486727476119995, "epoch": 8.78, "learning_rate": 6.103127641589181e-06, "loss": 0.4329, "step": 10386, "task_loss": 0.5354213714599609 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3342595398426056, "epoch": 8.78, "learning_rate": 6.098901098901099e-06, "loss": 0.4216, "step": 10387, "task_loss": 0.8670530319213867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21947066485881805, "epoch": 8.78, "learning_rate": 6.0946745562130185e-06, "loss": 0.3637, "step": 10388, "task_loss": 0.045155420899391174 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2350551187992096, "epoch": 8.78, "learning_rate": 6.0904480135249366e-06, "loss": 0.4289, "step": 10389, "task_loss": 0.4281821846961975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2293834537267685, "epoch": 8.78, "learning_rate": 6.086221470836856e-06, "loss": 0.4152, "step": 10390, "task_loss": 0.5405852198600769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.315999835729599, "epoch": 8.78, "learning_rate": 6.081994928148774e-06, "loss": 0.4184, "step": 10391, "task_loss": 0.5679742693901062 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.483694463968277, "epoch": 8.78, "learning_rate": 6.077768385460693e-06, "loss": 0.3786, "step": 10392, "task_loss": 0.7007626295089722 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3256436586380005, "epoch": 8.78, "learning_rate": 6.073541842772612e-06, "loss": 0.4161, "step": 10393, "task_loss": 0.24970579147338867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4905927777290344, "epoch": 8.79, "learning_rate": 6.069315300084531e-06, "loss": 0.3822, "step": 10394, "task_loss": 0.5679907202720642 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2644203305244446, "epoch": 8.79, "learning_rate": 6.06508875739645e-06, "loss": 0.284, "step": 10395, "task_loss": 0.6836908459663391 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2712416648864746, "epoch": 8.79, "learning_rate": 6.060862214708369e-06, "loss": 0.263, "step": 10396, "task_loss": 0.4341779053211212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2337079793214798, "epoch": 8.79, "learning_rate": 6.056635672020288e-06, "loss": 0.4999, "step": 10397, "task_loss": 0.25437474250793457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3711598515510559, "epoch": 8.79, "learning_rate": 6.0524091293322066e-06, "loss": 0.3757, "step": 10398, "task_loss": 0.3741176426410675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24600717425346375, "epoch": 8.79, "learning_rate": 6.0481825866441255e-06, "loss": 0.3009, "step": 10399, "task_loss": 0.30469605326652527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2980375587940216, "epoch": 8.79, "learning_rate": 6.043956043956044e-06, "loss": 0.2578, "step": 10400, "task_loss": 0.5250120162963867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2616725265979767, "epoch": 8.79, "learning_rate": 6.039729501267962e-06, "loss": 0.3155, "step": 10401, "task_loss": 0.618898332118988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43085891008377075, "epoch": 8.79, "learning_rate": 6.035502958579882e-06, "loss": 0.3464, "step": 10402, "task_loss": 0.5121468305587769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3438425660133362, "epoch": 8.79, "learning_rate": 6.0312764158918e-06, "loss": 0.3369, "step": 10403, "task_loss": 1.300254225730896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.411079466342926, "epoch": 8.79, "learning_rate": 6.02704987320372e-06, "loss": 0.3596, "step": 10404, "task_loss": 0.8620886206626892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4768844246864319, "epoch": 8.79, "learning_rate": 6.022823330515639e-06, "loss": 0.4165, "step": 10405, "task_loss": 0.711778461933136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31298163533210754, "epoch": 8.8, "learning_rate": 6.018596787827558e-06, "loss": 0.3742, "step": 10406, "task_loss": 1.1239935159683228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33938372135162354, "epoch": 8.8, "learning_rate": 6.014370245139477e-06, "loss": 0.3388, "step": 10407, "task_loss": 0.645356297492981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17186978459358215, "epoch": 8.8, "learning_rate": 6.010143702451395e-06, "loss": 0.2685, "step": 10408, "task_loss": 0.03959939628839493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4139705002307892, "epoch": 8.8, "learning_rate": 6.005917159763314e-06, "loss": 0.3043, "step": 10409, "task_loss": 0.6088593602180481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3759179711341858, "epoch": 8.8, "learning_rate": 6.0016906170752324e-06, "loss": 0.3491, "step": 10410, "task_loss": 0.13358567655086517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5208841562271118, "epoch": 8.8, "learning_rate": 5.997464074387152e-06, "loss": 0.4006, "step": 10411, "task_loss": 0.7816829085350037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38995715975761414, "epoch": 8.8, "learning_rate": 5.99323753169907e-06, "loss": 0.436, "step": 10412, "task_loss": 2.3727638721466064 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24432049691677094, "epoch": 8.8, "learning_rate": 5.98901098901099e-06, "loss": 0.4313, "step": 10413, "task_loss": 1.333655834197998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27580273151397705, "epoch": 8.8, "learning_rate": 5.984784446322908e-06, "loss": 0.294, "step": 10414, "task_loss": 0.677986741065979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29705536365509033, "epoch": 8.8, "learning_rate": 5.980557903634827e-06, "loss": 0.404, "step": 10415, "task_loss": 0.39004799723625183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2986558675765991, "epoch": 8.8, "learning_rate": 5.976331360946746e-06, "loss": 0.2969, "step": 10416, "task_loss": 0.14410589635372162 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5597624778747559, "epoch": 8.81, "learning_rate": 5.972104818258665e-06, "loss": 0.4689, "step": 10417, "task_loss": 1.8824102878570557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.406081885099411, "epoch": 8.81, "learning_rate": 5.9678782755705836e-06, "loss": 0.4155, "step": 10418, "task_loss": 0.66753089427948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.233069509267807, "epoch": 8.81, "learning_rate": 5.9636517328825025e-06, "loss": 0.2942, "step": 10419, "task_loss": 0.2811150550842285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5123084783554077, "epoch": 8.81, "learning_rate": 5.959425190194421e-06, "loss": 0.4248, "step": 10420, "task_loss": 0.651820957660675 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5521732568740845, "epoch": 8.81, "learning_rate": 5.95519864750634e-06, "loss": 0.4134, "step": 10421, "task_loss": 1.5152124166488647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17878499627113342, "epoch": 8.81, "learning_rate": 5.950972104818259e-06, "loss": 0.3302, "step": 10422, "task_loss": 0.19556304812431335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16046608984470367, "epoch": 8.81, "learning_rate": 5.946745562130178e-06, "loss": 0.3065, "step": 10423, "task_loss": 1.0488719940185547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3365938067436218, "epoch": 8.81, "learning_rate": 5.942519019442096e-06, "loss": 0.3126, "step": 10424, "task_loss": 0.34499451518058777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22020339965820312, "epoch": 8.81, "learning_rate": 5.938292476754016e-06, "loss": 0.3793, "step": 10425, "task_loss": 0.5699753761291504 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.11950044333934784, "epoch": 8.81, "learning_rate": 5.934065934065934e-06, "loss": 0.4298, "step": 10426, "task_loss": 0.35527607798576355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42416703701019287, "epoch": 8.81, "learning_rate": 5.929839391377854e-06, "loss": 0.4323, "step": 10427, "task_loss": 0.6153509020805359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20860655605793, "epoch": 8.81, "learning_rate": 5.925612848689772e-06, "loss": 0.2693, "step": 10428, "task_loss": 0.5964874625205994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3691558539867401, "epoch": 8.82, "learning_rate": 5.921386306001691e-06, "loss": 0.2642, "step": 10429, "task_loss": 0.5073607563972473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42879894375801086, "epoch": 8.82, "learning_rate": 5.917159763313609e-06, "loss": 0.3232, "step": 10430, "task_loss": 0.5207659006118774 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49050623178482056, "epoch": 8.82, "learning_rate": 5.912933220625528e-06, "loss": 0.3654, "step": 10431, "task_loss": 1.8078371286392212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4021673798561096, "epoch": 8.82, "learning_rate": 5.908706677937447e-06, "loss": 0.3943, "step": 10432, "task_loss": 0.8762192130088806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33206841349601746, "epoch": 8.82, "learning_rate": 5.904480135249366e-06, "loss": 0.4188, "step": 10433, "task_loss": 0.0793519839644432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20196223258972168, "epoch": 8.82, "learning_rate": 5.900253592561286e-06, "loss": 0.2542, "step": 10434, "task_loss": 0.05522362142801285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38283276557922363, "epoch": 8.82, "learning_rate": 5.896027049873204e-06, "loss": 0.3751, "step": 10435, "task_loss": 1.4365514516830444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2238149642944336, "epoch": 8.82, "learning_rate": 5.891800507185123e-06, "loss": 0.3915, "step": 10436, "task_loss": 0.47790297865867615 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4081907272338867, "epoch": 8.82, "learning_rate": 5.887573964497042e-06, "loss": 0.4061, "step": 10437, "task_loss": 0.8007245063781738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39941054582595825, "epoch": 8.82, "learning_rate": 5.8833474218089605e-06, "loss": 0.3278, "step": 10438, "task_loss": 0.9977150559425354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37820160388946533, "epoch": 8.82, "learning_rate": 5.8791208791208794e-06, "loss": 0.3712, "step": 10439, "task_loss": 0.6372966766357422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5911603569984436, "epoch": 8.82, "learning_rate": 5.874894336432798e-06, "loss": 0.4269, "step": 10440, "task_loss": 0.402508407831192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28881561756134033, "epoch": 8.83, "learning_rate": 5.870667793744717e-06, "loss": 0.3482, "step": 10441, "task_loss": 0.2989804446697235 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4393112063407898, "epoch": 8.83, "learning_rate": 5.866441251056636e-06, "loss": 0.384, "step": 10442, "task_loss": 0.7820533514022827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38091349601745605, "epoch": 8.83, "learning_rate": 5.862214708368555e-06, "loss": 0.2699, "step": 10443, "task_loss": 1.0663855075836182 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26595228910446167, "epoch": 8.83, "learning_rate": 5.857988165680474e-06, "loss": 0.3555, "step": 10444, "task_loss": 0.20775507390499115 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47235286235809326, "epoch": 8.83, "learning_rate": 5.853761622992393e-06, "loss": 0.4237, "step": 10445, "task_loss": 0.3829287588596344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28024032711982727, "epoch": 8.83, "learning_rate": 5.849535080304312e-06, "loss": 0.2971, "step": 10446, "task_loss": 0.5501790046691895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3127982020378113, "epoch": 8.83, "learning_rate": 5.84530853761623e-06, "loss": 0.3462, "step": 10447, "task_loss": 0.5126351714134216 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5277485847473145, "epoch": 8.83, "learning_rate": 5.8410819949281495e-06, "loss": 0.3637, "step": 10448, "task_loss": 0.5456387400627136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2662937641143799, "epoch": 8.83, "learning_rate": 5.8368554522400675e-06, "loss": 0.3779, "step": 10449, "task_loss": 1.1496986150741577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38354212045669556, "epoch": 8.83, "learning_rate": 5.832628909551987e-06, "loss": 0.4749, "step": 10450, "task_loss": 1.5018688440322876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.477462500333786, "epoch": 8.83, "learning_rate": 5.828402366863905e-06, "loss": 0.5103, "step": 10451, "task_loss": 0.9591981768608093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31725820899009705, "epoch": 8.83, "learning_rate": 5.824175824175824e-06, "loss": 0.3904, "step": 10452, "task_loss": 0.23489266633987427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47911036014556885, "epoch": 8.84, "learning_rate": 5.819949281487743e-06, "loss": 0.369, "step": 10453, "task_loss": 0.6942949295043945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38454657793045044, "epoch": 8.84, "learning_rate": 5.815722738799662e-06, "loss": 0.3301, "step": 10454, "task_loss": 0.3719600737094879 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.510131299495697, "epoch": 8.84, "learning_rate": 5.811496196111581e-06, "loss": 0.3182, "step": 10455, "task_loss": 0.7374346256256104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28108108043670654, "epoch": 8.84, "learning_rate": 5.8072696534235e-06, "loss": 0.4316, "step": 10456, "task_loss": 0.5342756509780884 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18161022663116455, "epoch": 8.84, "learning_rate": 5.803043110735419e-06, "loss": 0.2911, "step": 10457, "task_loss": 0.6893314123153687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45669710636138916, "epoch": 8.84, "learning_rate": 5.7988165680473375e-06, "loss": 0.367, "step": 10458, "task_loss": 0.7740815281867981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20296022295951843, "epoch": 8.84, "learning_rate": 5.794590025359256e-06, "loss": 0.3131, "step": 10459, "task_loss": 0.3846844434738159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40013182163238525, "epoch": 8.84, "learning_rate": 5.790363482671175e-06, "loss": 0.4496, "step": 10460, "task_loss": 1.3673583269119263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20002758502960205, "epoch": 8.84, "learning_rate": 5.786136939983094e-06, "loss": 0.2579, "step": 10461, "task_loss": 0.4183551073074341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3325389325618744, "epoch": 8.84, "learning_rate": 5.781910397295013e-06, "loss": 0.3193, "step": 10462, "task_loss": 0.5600717067718506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3233494758605957, "epoch": 8.84, "learning_rate": 5.777683854606932e-06, "loss": 0.3674, "step": 10463, "task_loss": 0.5678949356079102 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3349851965904236, "epoch": 8.84, "learning_rate": 5.773457311918851e-06, "loss": 0.4056, "step": 10464, "task_loss": 1.1104687452316284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6231229305267334, "epoch": 8.85, "learning_rate": 5.76923076923077e-06, "loss": 0.3732, "step": 10465, "task_loss": 0.6410720348358154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24400070309638977, "epoch": 8.85, "learning_rate": 5.765004226542689e-06, "loss": 0.3433, "step": 10466, "task_loss": 0.19105969369411469 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38292351365089417, "epoch": 8.85, "learning_rate": 5.7607776838546076e-06, "loss": 0.3699, "step": 10467, "task_loss": 1.0845474004745483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39904534816741943, "epoch": 8.85, "learning_rate": 5.756551141166526e-06, "loss": 0.4193, "step": 10468, "task_loss": 0.7654553651809692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4003687798976898, "epoch": 8.85, "learning_rate": 5.752324598478445e-06, "loss": 0.3593, "step": 10469, "task_loss": 0.22297053039073944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3450118601322174, "epoch": 8.85, "learning_rate": 5.748098055790363e-06, "loss": 0.3391, "step": 10470, "task_loss": 0.7097237706184387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4231759309768677, "epoch": 8.85, "learning_rate": 5.743871513102283e-06, "loss": 0.3647, "step": 10471, "task_loss": 0.28444379568099976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2966248691082001, "epoch": 8.85, "learning_rate": 5.739644970414201e-06, "loss": 0.366, "step": 10472, "task_loss": 0.8217769265174866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.433655321598053, "epoch": 8.85, "learning_rate": 5.735418427726121e-06, "loss": 0.3979, "step": 10473, "task_loss": 0.5183531045913696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35510674118995667, "epoch": 8.85, "learning_rate": 5.731191885038039e-06, "loss": 0.3705, "step": 10474, "task_loss": 0.24792934954166412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23211008310317993, "epoch": 8.85, "learning_rate": 5.726965342349958e-06, "loss": 0.2957, "step": 10475, "task_loss": 0.259744256734848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2525983154773712, "epoch": 8.85, "learning_rate": 5.722738799661877e-06, "loss": 0.3111, "step": 10476, "task_loss": 0.5207905769348145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38655757904052734, "epoch": 8.86, "learning_rate": 5.718512256973796e-06, "loss": 0.317, "step": 10477, "task_loss": 0.3512536287307739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2320643812417984, "epoch": 8.86, "learning_rate": 5.7142857142857145e-06, "loss": 0.3828, "step": 10478, "task_loss": 0.30625253915786743 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42895179986953735, "epoch": 8.86, "learning_rate": 5.710059171597633e-06, "loss": 0.3294, "step": 10479, "task_loss": 0.753816545009613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35077714920043945, "epoch": 8.86, "learning_rate": 5.705832628909552e-06, "loss": 0.3315, "step": 10480, "task_loss": 0.14668454229831696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4610116481781006, "epoch": 8.86, "learning_rate": 5.701606086221471e-06, "loss": 0.3776, "step": 10481, "task_loss": 1.0376172065734863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45168840885162354, "epoch": 8.86, "learning_rate": 5.69737954353339e-06, "loss": 0.5557, "step": 10482, "task_loss": 0.5488502979278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5054806470870972, "epoch": 8.86, "learning_rate": 5.693153000845309e-06, "loss": 0.3596, "step": 10483, "task_loss": 0.2914064824581146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45483261346817017, "epoch": 8.86, "learning_rate": 5.688926458157227e-06, "loss": 0.3552, "step": 10484, "task_loss": 0.3156895041465759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38567960262298584, "epoch": 8.86, "learning_rate": 5.684699915469147e-06, "loss": 0.3767, "step": 10485, "task_loss": 0.5318587422370911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21512821316719055, "epoch": 8.86, "learning_rate": 5.680473372781065e-06, "loss": 0.4602, "step": 10486, "task_loss": 0.7077475786209106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3648052513599396, "epoch": 8.86, "learning_rate": 5.6762468300929845e-06, "loss": 0.3934, "step": 10487, "task_loss": 0.9601614475250244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2327917516231537, "epoch": 8.87, "learning_rate": 5.672020287404903e-06, "loss": 0.3451, "step": 10488, "task_loss": 0.7424845695495605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41599369049072266, "epoch": 8.87, "learning_rate": 5.667793744716822e-06, "loss": 0.3154, "step": 10489, "task_loss": 0.4143931567668915 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5976848006248474, "epoch": 8.87, "learning_rate": 5.66356720202874e-06, "loss": 0.4024, "step": 10490, "task_loss": 0.6936287879943848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4617854058742523, "epoch": 8.87, "learning_rate": 5.659340659340659e-06, "loss": 0.3533, "step": 10491, "task_loss": 0.5180593729019165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45112961530685425, "epoch": 8.87, "learning_rate": 5.655114116652578e-06, "loss": 0.411, "step": 10492, "task_loss": 0.8355237245559692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2177576720714569, "epoch": 8.87, "learning_rate": 5.650887573964497e-06, "loss": 0.2739, "step": 10493, "task_loss": 0.5394307374954224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4003145694732666, "epoch": 8.87, "learning_rate": 5.646661031276417e-06, "loss": 0.4188, "step": 10494, "task_loss": 1.2934889793395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4112706184387207, "epoch": 8.87, "learning_rate": 5.642434488588335e-06, "loss": 0.4058, "step": 10495, "task_loss": 1.1841808557510376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43930792808532715, "epoch": 8.87, "learning_rate": 5.6382079459002546e-06, "loss": 0.5096, "step": 10496, "task_loss": 1.053077220916748 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3699222207069397, "epoch": 8.87, "learning_rate": 5.633981403212173e-06, "loss": 0.3921, "step": 10497, "task_loss": 0.1674131602048874 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4526301622390747, "epoch": 8.87, "learning_rate": 5.6297548605240915e-06, "loss": 0.4165, "step": 10498, "task_loss": 0.49757182598114014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31889957189559937, "epoch": 8.87, "learning_rate": 5.62552831783601e-06, "loss": 0.4172, "step": 10499, "task_loss": 0.272235244512558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38047248125076294, "epoch": 8.88, "learning_rate": 5.621301775147929e-06, "loss": 0.3946, "step": 10500, "task_loss": 1.6199826002120972 }, { "epoch": 8.88, "eval_accuracy": 0.9176237623762377, "eval_loss": 0.24891653656959534, "eval_runtime": 225.3499, "eval_samples_per_second": 112.048, "eval_steps_per_second": 0.879, "step": 10500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3256850242614746, "epoch": 8.88, "learning_rate": 5.617075232459848e-06, "loss": 0.3579, "step": 10501, "task_loss": 0.3629542291164398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34127897024154663, "epoch": 8.88, "learning_rate": 5.612848689771767e-06, "loss": 0.346, "step": 10502, "task_loss": 0.5949999690055847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33588463068008423, "epoch": 8.88, "learning_rate": 5.608622147083686e-06, "loss": 0.429, "step": 10503, "task_loss": 0.6808600425720215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43158018589019775, "epoch": 8.88, "learning_rate": 5.604395604395605e-06, "loss": 0.3416, "step": 10504, "task_loss": 0.5876849889755249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2707611322402954, "epoch": 8.88, "learning_rate": 5.600169061707524e-06, "loss": 0.3085, "step": 10505, "task_loss": 0.16990092396736145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25542157888412476, "epoch": 8.88, "learning_rate": 5.595942519019443e-06, "loss": 0.2872, "step": 10506, "task_loss": 0.294696182012558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.327543169260025, "epoch": 8.88, "learning_rate": 5.591715976331361e-06, "loss": 0.4159, "step": 10507, "task_loss": 0.454831600189209 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3070724308490753, "epoch": 8.88, "learning_rate": 5.58748943364328e-06, "loss": 0.4121, "step": 10508, "task_loss": 0.624697744846344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2310318797826767, "epoch": 8.88, "learning_rate": 5.5832628909551985e-06, "loss": 0.3904, "step": 10509, "task_loss": 0.31720322370529175 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2536173164844513, "epoch": 8.88, "learning_rate": 5.579036348267118e-06, "loss": 0.2513, "step": 10510, "task_loss": 0.24081407487392426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4361970126628876, "epoch": 8.88, "learning_rate": 5.574809805579036e-06, "loss": 0.4089, "step": 10511, "task_loss": 0.7810481190681458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32039326429367065, "epoch": 8.89, "learning_rate": 5.570583262890956e-06, "loss": 0.2511, "step": 10512, "task_loss": 0.2551576793193817 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48474329710006714, "epoch": 8.89, "learning_rate": 5.566356720202874e-06, "loss": 0.3552, "step": 10513, "task_loss": 0.8193480372428894 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1662999540567398, "epoch": 8.89, "learning_rate": 5.562130177514793e-06, "loss": 0.3201, "step": 10514, "task_loss": 0.45684829354286194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30625665187835693, "epoch": 8.89, "learning_rate": 5.557903634826712e-06, "loss": 0.4084, "step": 10515, "task_loss": 0.9072417616844177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5004543662071228, "epoch": 8.89, "learning_rate": 5.553677092138631e-06, "loss": 0.3983, "step": 10516, "task_loss": 0.5443535447120667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2521968185901642, "epoch": 8.89, "learning_rate": 5.54945054945055e-06, "loss": 0.2959, "step": 10517, "task_loss": 0.8739181756973267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34476685523986816, "epoch": 8.89, "learning_rate": 5.5452240067624685e-06, "loss": 0.3635, "step": 10518, "task_loss": 0.6010934114456177 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3205660581588745, "epoch": 8.89, "learning_rate": 5.540997464074387e-06, "loss": 0.5046, "step": 10519, "task_loss": 0.2961041033267975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5317286252975464, "epoch": 8.89, "learning_rate": 5.536770921386306e-06, "loss": 0.3987, "step": 10520, "task_loss": 1.2092161178588867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32604026794433594, "epoch": 8.89, "learning_rate": 5.532544378698225e-06, "loss": 0.4587, "step": 10521, "task_loss": 0.9963746666908264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6662105321884155, "epoch": 8.89, "learning_rate": 5.528317836010144e-06, "loss": 0.3808, "step": 10522, "task_loss": 0.844237744808197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22383232414722443, "epoch": 8.89, "learning_rate": 5.524091293322063e-06, "loss": 0.3186, "step": 10523, "task_loss": 0.5449993014335632 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32099008560180664, "epoch": 8.9, "learning_rate": 5.519864750633982e-06, "loss": 0.376, "step": 10524, "task_loss": 0.07917316257953644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32138481736183167, "epoch": 8.9, "learning_rate": 5.515638207945901e-06, "loss": 0.375, "step": 10525, "task_loss": 0.0775611475110054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27948567271232605, "epoch": 8.9, "learning_rate": 5.51141166525782e-06, "loss": 0.3175, "step": 10526, "task_loss": 0.47832366824150085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2130252569913864, "epoch": 8.9, "learning_rate": 5.5071851225697385e-06, "loss": 0.3276, "step": 10527, "task_loss": 0.43031612038612366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21758508682250977, "epoch": 8.9, "learning_rate": 5.502958579881657e-06, "loss": 0.335, "step": 10528, "task_loss": 0.815727174282074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32555532455444336, "epoch": 8.9, "learning_rate": 5.498732037193576e-06, "loss": 0.3433, "step": 10529, "task_loss": 0.4198053777217865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.412402868270874, "epoch": 8.9, "learning_rate": 5.494505494505494e-06, "loss": 0.3176, "step": 10530, "task_loss": 0.7705665230751038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38290080428123474, "epoch": 8.9, "learning_rate": 5.490278951817414e-06, "loss": 0.364, "step": 10531, "task_loss": 0.8077495694160461 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3473605513572693, "epoch": 8.9, "learning_rate": 5.486052409129332e-06, "loss": 0.3727, "step": 10532, "task_loss": 0.5337679386138916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40602636337280273, "epoch": 8.9, "learning_rate": 5.481825866441252e-06, "loss": 0.4247, "step": 10533, "task_loss": 1.3812623023986816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26739975810050964, "epoch": 8.9, "learning_rate": 5.47759932375317e-06, "loss": 0.3322, "step": 10534, "task_loss": 1.300783395767212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3053983449935913, "epoch": 8.9, "learning_rate": 5.47337278106509e-06, "loss": 0.3786, "step": 10535, "task_loss": 1.3127219676971436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2780449390411377, "epoch": 8.91, "learning_rate": 5.469146238377008e-06, "loss": 0.3548, "step": 10536, "task_loss": 0.18683917820453644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17003920674324036, "epoch": 8.91, "learning_rate": 5.4649196956889266e-06, "loss": 0.3385, "step": 10537, "task_loss": 0.190005823969841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30373498797416687, "epoch": 8.91, "learning_rate": 5.4606931530008455e-06, "loss": 0.3574, "step": 10538, "task_loss": 1.3013101816177368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2664998173713684, "epoch": 8.91, "learning_rate": 5.456466610312764e-06, "loss": 0.4277, "step": 10539, "task_loss": 0.08448263257741928 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40612179040908813, "epoch": 8.91, "learning_rate": 5.452240067624683e-06, "loss": 0.3665, "step": 10540, "task_loss": 1.2882914543151855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6719151139259338, "epoch": 8.91, "learning_rate": 5.448013524936602e-06, "loss": 0.4749, "step": 10541, "task_loss": 0.48421168327331543 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27277275919914246, "epoch": 8.91, "learning_rate": 5.443786982248521e-06, "loss": 0.3045, "step": 10542, "task_loss": 0.15740960836410522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3185015618801117, "epoch": 8.91, "learning_rate": 5.43956043956044e-06, "loss": 0.3226, "step": 10543, "task_loss": 0.7993637323379517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13551494479179382, "epoch": 8.91, "learning_rate": 5.435333896872359e-06, "loss": 0.3003, "step": 10544, "task_loss": 0.11653448641300201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48348331451416016, "epoch": 8.91, "learning_rate": 5.431107354184278e-06, "loss": 0.378, "step": 10545, "task_loss": 0.6741025447845459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4348740577697754, "epoch": 8.91, "learning_rate": 5.426880811496196e-06, "loss": 0.4626, "step": 10546, "task_loss": 0.6881992220878601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2195262908935547, "epoch": 8.91, "learning_rate": 5.4226542688081155e-06, "loss": 0.3281, "step": 10547, "task_loss": 0.9424790740013123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34371060132980347, "epoch": 8.92, "learning_rate": 5.4184277261200335e-06, "loss": 0.3617, "step": 10548, "task_loss": 0.8610680103302002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3726586103439331, "epoch": 8.92, "learning_rate": 5.414201183431953e-06, "loss": 0.3438, "step": 10549, "task_loss": 0.08373191952705383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26128527522087097, "epoch": 8.92, "learning_rate": 5.409974640743871e-06, "loss": 0.3832, "step": 10550, "task_loss": 0.42539262771606445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29745981097221375, "epoch": 8.92, "learning_rate": 5.405748098055791e-06, "loss": 0.3173, "step": 10551, "task_loss": 0.6949764490127563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3863251209259033, "epoch": 8.92, "learning_rate": 5.40152155536771e-06, "loss": 0.3332, "step": 10552, "task_loss": 0.2945582866668701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3376665413379669, "epoch": 8.92, "learning_rate": 5.397295012679628e-06, "loss": 0.3781, "step": 10553, "task_loss": 0.2574164867401123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29187387228012085, "epoch": 8.92, "learning_rate": 5.393068469991548e-06, "loss": 0.4159, "step": 10554, "task_loss": 0.39668914675712585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43753212690353394, "epoch": 8.92, "learning_rate": 5.388841927303466e-06, "loss": 0.4158, "step": 10555, "task_loss": 0.6210050582885742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3267529606819153, "epoch": 8.92, "learning_rate": 5.3846153846153855e-06, "loss": 0.3835, "step": 10556, "task_loss": 0.8200408220291138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2754681706428528, "epoch": 8.92, "learning_rate": 5.3803888419273036e-06, "loss": 0.4152, "step": 10557, "task_loss": 0.48925378918647766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22396092116832733, "epoch": 8.92, "learning_rate": 5.3761622992392224e-06, "loss": 0.3139, "step": 10558, "task_loss": 0.5447292327880859 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3004778027534485, "epoch": 8.93, "learning_rate": 5.371935756551141e-06, "loss": 0.4364, "step": 10559, "task_loss": 0.8373518586158752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3843916356563568, "epoch": 8.93, "learning_rate": 5.36770921386306e-06, "loss": 0.361, "step": 10560, "task_loss": 1.3160759210586548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38844168186187744, "epoch": 8.93, "learning_rate": 5.363482671174979e-06, "loss": 0.3472, "step": 10561, "task_loss": 0.3519066870212555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2851048707962036, "epoch": 8.93, "learning_rate": 5.359256128486898e-06, "loss": 0.2805, "step": 10562, "task_loss": 0.27651041746139526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24855992197990417, "epoch": 8.93, "learning_rate": 5.355029585798817e-06, "loss": 0.2873, "step": 10563, "task_loss": 0.3810814917087555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4621483087539673, "epoch": 8.93, "learning_rate": 5.350803043110736e-06, "loss": 0.3358, "step": 10564, "task_loss": 0.6374263167381287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33553358912467957, "epoch": 8.93, "learning_rate": 5.346576500422655e-06, "loss": 0.4768, "step": 10565, "task_loss": 0.7039610147476196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1790844053030014, "epoch": 8.93, "learning_rate": 5.342349957734574e-06, "loss": 0.4636, "step": 10566, "task_loss": 0.5062955021858215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2807973027229309, "epoch": 8.93, "learning_rate": 5.3381234150464925e-06, "loss": 0.3999, "step": 10567, "task_loss": 1.2764369249343872 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5061820149421692, "epoch": 8.93, "learning_rate": 5.333896872358411e-06, "loss": 0.398, "step": 10568, "task_loss": 0.31959405541419983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5525521636009216, "epoch": 8.93, "learning_rate": 5.329670329670329e-06, "loss": 0.3362, "step": 10569, "task_loss": 0.7512363791465759 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3614034056663513, "epoch": 8.93, "learning_rate": 5.325443786982249e-06, "loss": 0.3506, "step": 10570, "task_loss": 0.43919432163238525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25045835971832275, "epoch": 8.94, "learning_rate": 5.321217244294167e-06, "loss": 0.2651, "step": 10571, "task_loss": 0.604154109954834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26306185126304626, "epoch": 8.94, "learning_rate": 5.316990701606087e-06, "loss": 0.3136, "step": 10572, "task_loss": 0.4441031813621521 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2923782467842102, "epoch": 8.94, "learning_rate": 5.312764158918005e-06, "loss": 0.2962, "step": 10573, "task_loss": 0.600132167339325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21817299723625183, "epoch": 8.94, "learning_rate": 5.308537616229924e-06, "loss": 0.3013, "step": 10574, "task_loss": 0.23203979432582855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49228203296661377, "epoch": 8.94, "learning_rate": 5.304311073541843e-06, "loss": 0.3477, "step": 10575, "task_loss": 1.3442047834396362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19832350313663483, "epoch": 8.94, "learning_rate": 5.300084530853762e-06, "loss": 0.3757, "step": 10576, "task_loss": 0.2660848796367645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.508938729763031, "epoch": 8.94, "learning_rate": 5.2958579881656805e-06, "loss": 0.4269, "step": 10577, "task_loss": 0.3726608455181122 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22324714064598083, "epoch": 8.94, "learning_rate": 5.2916314454775994e-06, "loss": 0.3319, "step": 10578, "task_loss": 0.6251605749130249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22403176128864288, "epoch": 8.94, "learning_rate": 5.287404902789518e-06, "loss": 0.2917, "step": 10579, "task_loss": 0.17355366051197052 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3432474434375763, "epoch": 8.94, "learning_rate": 5.283178360101437e-06, "loss": 0.3461, "step": 10580, "task_loss": 0.3043481111526489 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2583116590976715, "epoch": 8.94, "learning_rate": 5.278951817413356e-06, "loss": 0.411, "step": 10581, "task_loss": 0.6677039265632629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42057859897613525, "epoch": 8.94, "learning_rate": 5.274725274725275e-06, "loss": 0.3854, "step": 10582, "task_loss": 0.4723869860172272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4363538324832916, "epoch": 8.95, "learning_rate": 5.270498732037194e-06, "loss": 0.4118, "step": 10583, "task_loss": 0.17191146314144135 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26022660732269287, "epoch": 8.95, "learning_rate": 5.266272189349113e-06, "loss": 0.3207, "step": 10584, "task_loss": 0.6693881750106812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.252693235874176, "epoch": 8.95, "learning_rate": 5.262045646661032e-06, "loss": 0.3499, "step": 10585, "task_loss": 0.17671498656272888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29161888360977173, "epoch": 8.95, "learning_rate": 5.2578191039729506e-06, "loss": 0.4442, "step": 10586, "task_loss": 1.3715044260025024 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3111943304538727, "epoch": 8.95, "learning_rate": 5.2535925612848695e-06, "loss": 0.455, "step": 10587, "task_loss": 0.33042800426483154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3734613358974457, "epoch": 8.95, "learning_rate": 5.249366018596788e-06, "loss": 0.3033, "step": 10588, "task_loss": 0.5430118441581726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.456808865070343, "epoch": 8.95, "learning_rate": 5.245139475908707e-06, "loss": 0.342, "step": 10589, "task_loss": 0.2375822365283966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38724806904792786, "epoch": 8.95, "learning_rate": 5.240912933220625e-06, "loss": 0.3927, "step": 10590, "task_loss": 0.35092824697494507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24220506846904755, "epoch": 8.95, "learning_rate": 5.236686390532545e-06, "loss": 0.3321, "step": 10591, "task_loss": 0.19416414201259613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46470707654953003, "epoch": 8.95, "learning_rate": 5.232459847844463e-06, "loss": 0.3165, "step": 10592, "task_loss": 0.7578445672988892 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2870655059814453, "epoch": 8.95, "learning_rate": 5.228233305156383e-06, "loss": 0.292, "step": 10593, "task_loss": 0.36816442012786865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5843943953514099, "epoch": 8.95, "learning_rate": 5.224006762468301e-06, "loss": 0.4256, "step": 10594, "task_loss": 1.374983549118042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37522709369659424, "epoch": 8.96, "learning_rate": 5.219780219780221e-06, "loss": 0.345, "step": 10595, "task_loss": 1.3052809238433838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.361098051071167, "epoch": 8.96, "learning_rate": 5.215553677092139e-06, "loss": 0.341, "step": 10596, "task_loss": 0.6361895799636841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3357596695423126, "epoch": 8.96, "learning_rate": 5.2113271344040575e-06, "loss": 0.3858, "step": 10597, "task_loss": 0.7915270328521729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4455062747001648, "epoch": 8.96, "learning_rate": 5.207100591715976e-06, "loss": 0.3695, "step": 10598, "task_loss": 0.9808911085128784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27943286299705505, "epoch": 8.96, "learning_rate": 5.202874049027895e-06, "loss": 0.3396, "step": 10599, "task_loss": 0.47498053312301636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4404623806476593, "epoch": 8.96, "learning_rate": 5.198647506339814e-06, "loss": 0.3583, "step": 10600, "task_loss": 0.4487472176551819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17859220504760742, "epoch": 8.96, "learning_rate": 5.194420963651733e-06, "loss": 0.291, "step": 10601, "task_loss": 0.6823776960372925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5924414396286011, "epoch": 8.96, "learning_rate": 5.190194420963652e-06, "loss": 0.3913, "step": 10602, "task_loss": 0.9030060172080994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2751384675502777, "epoch": 8.96, "learning_rate": 5.185967878275571e-06, "loss": 0.3242, "step": 10603, "task_loss": 0.4541858732700348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.534738302230835, "epoch": 8.96, "learning_rate": 5.18174133558749e-06, "loss": 0.4127, "step": 10604, "task_loss": 0.8677483797073364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26253435015678406, "epoch": 8.96, "learning_rate": 5.177514792899409e-06, "loss": 0.3494, "step": 10605, "task_loss": 0.6880300641059875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4415886402130127, "epoch": 8.96, "learning_rate": 5.173288250211327e-06, "loss": 0.435, "step": 10606, "task_loss": 0.9131572246551514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3032299876213074, "epoch": 8.97, "learning_rate": 5.1690617075232464e-06, "loss": 0.3768, "step": 10607, "task_loss": 0.2664109766483307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3132404088973999, "epoch": 8.97, "learning_rate": 5.1648351648351645e-06, "loss": 0.368, "step": 10608, "task_loss": 0.4143713414669037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1638088971376419, "epoch": 8.97, "learning_rate": 5.160608622147084e-06, "loss": 0.2673, "step": 10609, "task_loss": 0.05413144826889038 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2843250632286072, "epoch": 8.97, "learning_rate": 5.156382079459002e-06, "loss": 0.3393, "step": 10610, "task_loss": 0.11795742064714432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2727190852165222, "epoch": 8.97, "learning_rate": 5.152155536770922e-06, "loss": 0.3207, "step": 10611, "task_loss": 0.2639559805393219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32542917132377625, "epoch": 8.97, "learning_rate": 5.147928994082841e-06, "loss": 0.3273, "step": 10612, "task_loss": 0.6337400078773499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4037979543209076, "epoch": 8.97, "learning_rate": 5.143702451394759e-06, "loss": 0.4104, "step": 10613, "task_loss": 0.6392010450363159 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.323793888092041, "epoch": 8.97, "learning_rate": 5.139475908706679e-06, "loss": 0.3123, "step": 10614, "task_loss": 0.6191710829734802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2690281569957733, "epoch": 8.97, "learning_rate": 5.135249366018597e-06, "loss": 0.3434, "step": 10615, "task_loss": 0.6663408279418945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5430330038070679, "epoch": 8.97, "learning_rate": 5.1310228233305165e-06, "loss": 0.4859, "step": 10616, "task_loss": 0.7418842315673828 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46147239208221436, "epoch": 8.97, "learning_rate": 5.1267962806424345e-06, "loss": 0.4502, "step": 10617, "task_loss": 0.5960937738418579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4016711413860321, "epoch": 8.97, "learning_rate": 5.122569737954354e-06, "loss": 0.3542, "step": 10618, "task_loss": 0.8922397494316101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29150354862213135, "epoch": 8.98, "learning_rate": 5.118343195266272e-06, "loss": 0.3051, "step": 10619, "task_loss": 0.4391981065273285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5599380731582642, "epoch": 8.98, "learning_rate": 5.114116652578191e-06, "loss": 0.3653, "step": 10620, "task_loss": 0.47943735122680664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4838765263557434, "epoch": 8.98, "learning_rate": 5.10989010989011e-06, "loss": 0.3155, "step": 10621, "task_loss": 0.4959776997566223 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2942233085632324, "epoch": 8.98, "learning_rate": 5.105663567202029e-06, "loss": 0.3763, "step": 10622, "task_loss": 0.8853696584701538 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4346213638782501, "epoch": 8.98, "learning_rate": 5.101437024513948e-06, "loss": 0.5514, "step": 10623, "task_loss": 0.8086497783660889 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4018877446651459, "epoch": 8.98, "learning_rate": 5.097210481825867e-06, "loss": 0.3526, "step": 10624, "task_loss": 0.9724776148796082 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3242490291595459, "epoch": 8.98, "learning_rate": 5.092983939137786e-06, "loss": 0.3416, "step": 10625, "task_loss": 0.22248663008213043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4244734048843384, "epoch": 8.98, "learning_rate": 5.0887573964497045e-06, "loss": 0.3521, "step": 10626, "task_loss": 0.9324353337287903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22923877835273743, "epoch": 8.98, "learning_rate": 5.0845308537616234e-06, "loss": 0.2864, "step": 10627, "task_loss": 0.24742712080478668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26313456892967224, "epoch": 8.98, "learning_rate": 5.080304311073542e-06, "loss": 0.2596, "step": 10628, "task_loss": 0.25181740522384644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23418673872947693, "epoch": 8.98, "learning_rate": 5.07607776838546e-06, "loss": 0.3234, "step": 10629, "task_loss": 0.4639100432395935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21122485399246216, "epoch": 8.99, "learning_rate": 5.07185122569738e-06, "loss": 0.3472, "step": 10630, "task_loss": 0.2071051001548767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1738552451133728, "epoch": 8.99, "learning_rate": 5.067624683009298e-06, "loss": 0.3202, "step": 10631, "task_loss": 0.3892912268638611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27671360969543457, "epoch": 8.99, "learning_rate": 5.063398140321218e-06, "loss": 0.3907, "step": 10632, "task_loss": 0.31942641735076904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3668547570705414, "epoch": 8.99, "learning_rate": 5.059171597633136e-06, "loss": 0.347, "step": 10633, "task_loss": 0.45272907614707947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23243530094623566, "epoch": 8.99, "learning_rate": 5.054945054945056e-06, "loss": 0.3694, "step": 10634, "task_loss": 0.1549949049949646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1866087019443512, "epoch": 8.99, "learning_rate": 5.050718512256974e-06, "loss": 0.3542, "step": 10635, "task_loss": 0.24225318431854248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3390258550643921, "epoch": 8.99, "learning_rate": 5.046491969568893e-06, "loss": 0.397, "step": 10636, "task_loss": 0.0911502093076706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1836473047733307, "epoch": 8.99, "learning_rate": 5.0422654268808115e-06, "loss": 0.3597, "step": 10637, "task_loss": 0.5673336386680603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5017399787902832, "epoch": 8.99, "learning_rate": 5.03803888419273e-06, "loss": 0.4054, "step": 10638, "task_loss": 0.3923577666282654 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40955376625061035, "epoch": 8.99, "learning_rate": 5.033812341504649e-06, "loss": 0.373, "step": 10639, "task_loss": 0.7415474653244019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23151738941669464, "epoch": 8.99, "learning_rate": 5.029585798816568e-06, "loss": 0.2752, "step": 10640, "task_loss": 0.3509952425956726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34489303827285767, "epoch": 8.99, "learning_rate": 5.025359256128487e-06, "loss": 0.3318, "step": 10641, "task_loss": 0.47115615010261536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32184505462646484, "epoch": 9.0, "learning_rate": 5.021132713440406e-06, "loss": 0.4878, "step": 10642, "task_loss": 0.10446401685476303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3233630061149597, "epoch": 9.0, "learning_rate": 5.016906170752325e-06, "loss": 0.4879, "step": 10643, "task_loss": 0.3864535093307495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3556531071662903, "epoch": 9.0, "learning_rate": 5.012679628064244e-06, "loss": 0.3533, "step": 10644, "task_loss": 0.591995894908905 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4322413206100464, "epoch": 9.0, "learning_rate": 5.008453085376163e-06, "loss": 0.5175, "step": 10645, "task_loss": 0.6103222370147705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43343114852905273, "epoch": 9.0, "learning_rate": 5.0042265426880815e-06, "loss": 0.3903, "step": 10646, "task_loss": 0.8915157318115234 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4003659784793854, "epoch": 9.0, "learning_rate": 5e-06, "loss": 0.4182, "step": 10647, "task_loss": 0.21519266068935394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2912815511226654, "epoch": 9.0, "learning_rate": 4.995773457311919e-06, "loss": 0.6289, "step": 10648, "task_loss": 0.20611804723739624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5121369361877441, "epoch": 9.0, "learning_rate": 4.991546914623838e-06, "loss": 0.3234, "step": 10649, "task_loss": 1.0983569622039795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5387423634529114, "epoch": 9.0, "learning_rate": 4.987320371935757e-06, "loss": 0.4862, "step": 10650, "task_loss": 0.6497392058372498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.463063508272171, "epoch": 9.0, "learning_rate": 4.983093829247676e-06, "loss": 0.4042, "step": 10651, "task_loss": 0.5647193193435669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32789114117622375, "epoch": 9.0, "learning_rate": 4.978867286559594e-06, "loss": 0.3413, "step": 10652, "task_loss": 0.18238121271133423 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4732763469219208, "epoch": 9.01, "learning_rate": 4.974640743871514e-06, "loss": 0.4078, "step": 10653, "task_loss": 0.5495404005050659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14766304194927216, "epoch": 9.01, "learning_rate": 4.970414201183432e-06, "loss": 0.3616, "step": 10654, "task_loss": 0.09680415689945221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36063235998153687, "epoch": 9.01, "learning_rate": 4.9661876584953515e-06, "loss": 0.2867, "step": 10655, "task_loss": 0.03089725971221924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4248960018157959, "epoch": 9.01, "learning_rate": 4.96196111580727e-06, "loss": 0.3523, "step": 10656, "task_loss": 0.6082748770713806 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3456171452999115, "epoch": 9.01, "learning_rate": 4.957734573119189e-06, "loss": 0.3932, "step": 10657, "task_loss": 0.5271035432815552 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38025981187820435, "epoch": 9.01, "learning_rate": 4.953508030431107e-06, "loss": 0.3571, "step": 10658, "task_loss": 0.7842199802398682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4166877865791321, "epoch": 9.01, "learning_rate": 4.949281487743026e-06, "loss": 0.418, "step": 10659, "task_loss": 0.5476058721542358 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4175650477409363, "epoch": 9.01, "learning_rate": 4.945054945054945e-06, "loss": 0.3421, "step": 10660, "task_loss": 0.261981338262558 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2540813386440277, "epoch": 9.01, "learning_rate": 4.940828402366864e-06, "loss": 0.3023, "step": 10661, "task_loss": 0.3522268235683441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29951393604278564, "epoch": 9.01, "learning_rate": 4.936601859678783e-06, "loss": 0.3398, "step": 10662, "task_loss": 0.7414291501045227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.280769944190979, "epoch": 9.01, "learning_rate": 4.932375316990702e-06, "loss": 0.3469, "step": 10663, "task_loss": 0.8019564151763916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3119147717952728, "epoch": 9.01, "learning_rate": 4.928148774302621e-06, "loss": 0.4426, "step": 10664, "task_loss": 0.06697451323270798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25725752115249634, "epoch": 9.02, "learning_rate": 4.92392223161454e-06, "loss": 0.3687, "step": 10665, "task_loss": 0.7880474328994751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22658953070640564, "epoch": 9.02, "learning_rate": 4.9196956889264585e-06, "loss": 0.2822, "step": 10666, "task_loss": 0.8439580798149109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.581402599811554, "epoch": 9.02, "learning_rate": 4.915469146238377e-06, "loss": 0.5299, "step": 10667, "task_loss": 0.35453835129737854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34796902537345886, "epoch": 9.02, "learning_rate": 4.9112426035502954e-06, "loss": 0.335, "step": 10668, "task_loss": 0.6323029398918152 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31796878576278687, "epoch": 9.02, "learning_rate": 4.907016060862215e-06, "loss": 0.457, "step": 10669, "task_loss": 0.7333263158798218 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5624836087226868, "epoch": 9.02, "learning_rate": 4.902789518174133e-06, "loss": 0.3591, "step": 10670, "task_loss": 0.7293015718460083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34871602058410645, "epoch": 9.02, "learning_rate": 4.898562975486053e-06, "loss": 0.3605, "step": 10671, "task_loss": 0.4668397307395935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36086493730545044, "epoch": 9.02, "learning_rate": 4.894336432797972e-06, "loss": 0.3219, "step": 10672, "task_loss": 0.6964598894119263 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3666614592075348, "epoch": 9.02, "learning_rate": 4.89010989010989e-06, "loss": 0.3484, "step": 10673, "task_loss": 0.6715587973594666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27762657403945923, "epoch": 9.02, "learning_rate": 4.88588334742181e-06, "loss": 0.345, "step": 10674, "task_loss": 0.7563305497169495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27642008662223816, "epoch": 9.02, "learning_rate": 4.881656804733728e-06, "loss": 0.2783, "step": 10675, "task_loss": 0.6613790988922119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5069283843040466, "epoch": 9.02, "learning_rate": 4.877430262045647e-06, "loss": 0.3958, "step": 10676, "task_loss": 0.3895402252674103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28122851252555847, "epoch": 9.03, "learning_rate": 4.8732037193575655e-06, "loss": 0.2782, "step": 10677, "task_loss": 0.2643376290798187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49160489439964294, "epoch": 9.03, "learning_rate": 4.868977176669485e-06, "loss": 0.3899, "step": 10678, "task_loss": 1.1128556728363037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41654402017593384, "epoch": 9.03, "learning_rate": 4.864750633981403e-06, "loss": 0.3316, "step": 10679, "task_loss": 0.4175534248352051 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3051929473876953, "epoch": 9.03, "learning_rate": 4.860524091293322e-06, "loss": 0.3463, "step": 10680, "task_loss": 0.9455059766769409 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43045341968536377, "epoch": 9.03, "learning_rate": 4.856297548605241e-06, "loss": 0.3645, "step": 10681, "task_loss": 0.6507662534713745 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47211459279060364, "epoch": 9.03, "learning_rate": 4.85207100591716e-06, "loss": 0.4446, "step": 10682, "task_loss": 0.988862931728363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20833662152290344, "epoch": 9.03, "learning_rate": 4.847844463229079e-06, "loss": 0.3528, "step": 10683, "task_loss": 1.0985119342803955 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35033586621284485, "epoch": 9.03, "learning_rate": 4.843617920540998e-06, "loss": 0.3259, "step": 10684, "task_loss": 0.6822803020477295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28949639201164246, "epoch": 9.03, "learning_rate": 4.839391377852917e-06, "loss": 0.3673, "step": 10685, "task_loss": 0.2227640151977539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31784504652023315, "epoch": 9.03, "learning_rate": 4.8351648351648355e-06, "loss": 0.3665, "step": 10686, "task_loss": 1.166419506072998 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4475977122783661, "epoch": 9.03, "learning_rate": 4.830938292476754e-06, "loss": 0.4198, "step": 10687, "task_loss": 0.18233591318130493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3862488269805908, "epoch": 9.03, "learning_rate": 4.826711749788673e-06, "loss": 0.4287, "step": 10688, "task_loss": 0.83706134557724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.261260986328125, "epoch": 9.04, "learning_rate": 4.822485207100591e-06, "loss": 0.35, "step": 10689, "task_loss": 0.20080877840518951 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35010775923728943, "epoch": 9.04, "learning_rate": 4.818258664412511e-06, "loss": 0.3058, "step": 10690, "task_loss": 0.38369935750961304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45298463106155396, "epoch": 9.04, "learning_rate": 4.814032121724429e-06, "loss": 0.4095, "step": 10691, "task_loss": 0.8317689299583435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46889349818229675, "epoch": 9.04, "learning_rate": 4.809805579036349e-06, "loss": 0.3966, "step": 10692, "task_loss": 0.9253939390182495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2578296363353729, "epoch": 9.04, "learning_rate": 4.805579036348267e-06, "loss": 0.4157, "step": 10693, "task_loss": 0.5661696195602417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20081967115402222, "epoch": 9.04, "learning_rate": 4.801352493660187e-06, "loss": 0.397, "step": 10694, "task_loss": 0.5611146688461304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.491626501083374, "epoch": 9.04, "learning_rate": 4.797125950972105e-06, "loss": 0.4398, "step": 10695, "task_loss": 0.5792623162269592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19662265479564667, "epoch": 9.04, "learning_rate": 4.7928994082840236e-06, "loss": 0.2359, "step": 10696, "task_loss": 0.2238200604915619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14374740421772003, "epoch": 9.04, "learning_rate": 4.7886728655959424e-06, "loss": 0.2979, "step": 10697, "task_loss": 0.1255718171596527 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30877968668937683, "epoch": 9.04, "learning_rate": 4.784446322907861e-06, "loss": 0.4171, "step": 10698, "task_loss": 0.3169753849506378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3452456593513489, "epoch": 9.04, "learning_rate": 4.78021978021978e-06, "loss": 0.3369, "step": 10699, "task_loss": 1.0453174114227295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2903060019016266, "epoch": 9.04, "learning_rate": 4.775993237531699e-06, "loss": 0.3928, "step": 10700, "task_loss": 0.5046507120132446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23889023065567017, "epoch": 9.05, "learning_rate": 4.771766694843619e-06, "loss": 0.3848, "step": 10701, "task_loss": 0.055103663355112076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5400428771972656, "epoch": 9.05, "learning_rate": 4.767540152155537e-06, "loss": 0.3857, "step": 10702, "task_loss": 0.7502389550209045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2559239864349365, "epoch": 9.05, "learning_rate": 4.763313609467456e-06, "loss": 0.3334, "step": 10703, "task_loss": 0.38283246755599976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3501482307910919, "epoch": 9.05, "learning_rate": 4.759087066779375e-06, "loss": 0.4123, "step": 10704, "task_loss": 1.1107497215270996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3360772430896759, "epoch": 9.05, "learning_rate": 4.754860524091294e-06, "loss": 0.3065, "step": 10705, "task_loss": 0.1607997566461563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15610381960868835, "epoch": 9.05, "learning_rate": 4.7506339814032125e-06, "loss": 0.3427, "step": 10706, "task_loss": 0.2609812021255493 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2942715883255005, "epoch": 9.05, "learning_rate": 4.746407438715131e-06, "loss": 0.4206, "step": 10707, "task_loss": 0.6877492070198059 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.229124516248703, "epoch": 9.05, "learning_rate": 4.74218089602705e-06, "loss": 0.4248, "step": 10708, "task_loss": 0.931330680847168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2846606969833374, "epoch": 9.05, "learning_rate": 4.737954353338969e-06, "loss": 0.3611, "step": 10709, "task_loss": 0.6238901615142822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5292601585388184, "epoch": 9.05, "learning_rate": 4.733727810650888e-06, "loss": 0.477, "step": 10710, "task_loss": 0.5242747664451599 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25060832500457764, "epoch": 9.05, "learning_rate": 4.729501267962807e-06, "loss": 0.3025, "step": 10711, "task_loss": 0.5326293110847473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3296988606452942, "epoch": 9.05, "learning_rate": 4.725274725274725e-06, "loss": 0.3033, "step": 10712, "task_loss": 0.3291831314563751 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2981824278831482, "epoch": 9.06, "learning_rate": 4.721048182586645e-06, "loss": 0.4077, "step": 10713, "task_loss": 0.39480432868003845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4494365155696869, "epoch": 9.06, "learning_rate": 4.716821639898563e-06, "loss": 0.3906, "step": 10714, "task_loss": 1.0682185888290405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5336811542510986, "epoch": 9.06, "learning_rate": 4.7125950972104825e-06, "loss": 0.343, "step": 10715, "task_loss": 0.46252161264419556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2937215268611908, "epoch": 9.06, "learning_rate": 4.7083685545224005e-06, "loss": 0.3373, "step": 10716, "task_loss": 0.7945277690887451 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2907503545284271, "epoch": 9.06, "learning_rate": 4.70414201183432e-06, "loss": 0.3886, "step": 10717, "task_loss": 0.5997141599655151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26890385150909424, "epoch": 9.06, "learning_rate": 4.699915469146238e-06, "loss": 0.3577, "step": 10718, "task_loss": 0.7894372940063477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39001747965812683, "epoch": 9.06, "learning_rate": 4.695688926458157e-06, "loss": 0.3316, "step": 10719, "task_loss": 0.1928696632385254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32406699657440186, "epoch": 9.06, "learning_rate": 4.691462383770076e-06, "loss": 0.2641, "step": 10720, "task_loss": 0.31352487206459045 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2600927948951721, "epoch": 9.06, "learning_rate": 4.687235841081995e-06, "loss": 0.3162, "step": 10721, "task_loss": 0.42545148730278015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3026631474494934, "epoch": 9.06, "learning_rate": 4.683009298393914e-06, "loss": 0.3172, "step": 10722, "task_loss": 0.15800407528877258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2899171710014343, "epoch": 9.06, "learning_rate": 4.678782755705833e-06, "loss": 0.367, "step": 10723, "task_loss": 0.1556074321269989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2743762731552124, "epoch": 9.07, "learning_rate": 4.674556213017752e-06, "loss": 0.4027, "step": 10724, "task_loss": 0.45050162076950073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4501574635505676, "epoch": 9.07, "learning_rate": 4.6703296703296706e-06, "loss": 0.4629, "step": 10725, "task_loss": 0.8416013717651367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3481593728065491, "epoch": 9.07, "learning_rate": 4.6661031276415895e-06, "loss": 0.3863, "step": 10726, "task_loss": 0.059805918484926224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5137248039245605, "epoch": 9.07, "learning_rate": 4.661876584953508e-06, "loss": 0.4973, "step": 10727, "task_loss": 1.0285333395004272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31665855646133423, "epoch": 9.07, "learning_rate": 4.657650042265426e-06, "loss": 0.3929, "step": 10728, "task_loss": 1.3786296844482422 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2799319624900818, "epoch": 9.07, "learning_rate": 4.653423499577346e-06, "loss": 0.4027, "step": 10729, "task_loss": 1.0779032707214355 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3463509976863861, "epoch": 9.07, "learning_rate": 4.649196956889264e-06, "loss": 0.3102, "step": 10730, "task_loss": 1.2674719095230103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3920883536338806, "epoch": 9.07, "learning_rate": 4.644970414201184e-06, "loss": 0.3149, "step": 10731, "task_loss": 0.42668280005455017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26851990818977356, "epoch": 9.07, "learning_rate": 4.640743871513103e-06, "loss": 0.4617, "step": 10732, "task_loss": 0.7153639793395996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5444235801696777, "epoch": 9.07, "learning_rate": 4.636517328825022e-06, "loss": 0.393, "step": 10733, "task_loss": 1.545829176902771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38023605942726135, "epoch": 9.07, "learning_rate": 4.632290786136941e-06, "loss": 0.3651, "step": 10734, "task_loss": 0.7259161472320557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23945069313049316, "epoch": 9.07, "learning_rate": 4.628064243448859e-06, "loss": 0.2796, "step": 10735, "task_loss": 0.5746563076972961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5219240784645081, "epoch": 9.08, "learning_rate": 4.623837700760778e-06, "loss": 0.3858, "step": 10736, "task_loss": 0.5761024951934814 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1435006558895111, "epoch": 9.08, "learning_rate": 4.619611158072696e-06, "loss": 0.3705, "step": 10737, "task_loss": 0.3132264018058777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34755784273147583, "epoch": 9.08, "learning_rate": 4.615384615384616e-06, "loss": 0.3582, "step": 10738, "task_loss": 0.6461994051933289 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3404172360897064, "epoch": 9.08, "learning_rate": 4.611158072696534e-06, "loss": 0.4017, "step": 10739, "task_loss": 0.5304747819900513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1973743438720703, "epoch": 9.08, "learning_rate": 4.606931530008454e-06, "loss": 0.2858, "step": 10740, "task_loss": 0.7629637122154236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3983118236064911, "epoch": 9.08, "learning_rate": 4.602704987320372e-06, "loss": 0.5235, "step": 10741, "task_loss": 0.8805110454559326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3310662508010864, "epoch": 9.08, "learning_rate": 4.598478444632291e-06, "loss": 0.369, "step": 10742, "task_loss": 0.5598223805427551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46633660793304443, "epoch": 9.08, "learning_rate": 4.59425190194421e-06, "loss": 0.3477, "step": 10743, "task_loss": 0.6799585819244385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42530912160873413, "epoch": 9.08, "learning_rate": 4.590025359256129e-06, "loss": 0.3234, "step": 10744, "task_loss": 0.3565773367881775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31919875741004944, "epoch": 9.08, "learning_rate": 4.5857988165680475e-06, "loss": 0.3198, "step": 10745, "task_loss": 0.47649940848350525 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22381871938705444, "epoch": 9.08, "learning_rate": 4.5815722738799664e-06, "loss": 0.3088, "step": 10746, "task_loss": 0.16297003626823425 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4028918147087097, "epoch": 9.08, "learning_rate": 4.577345731191885e-06, "loss": 0.3986, "step": 10747, "task_loss": 1.0495113134384155 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21191802620887756, "epoch": 9.09, "learning_rate": 4.573119188503804e-06, "loss": 0.3383, "step": 10748, "task_loss": 0.21829430758953094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4434305429458618, "epoch": 9.09, "learning_rate": 4.568892645815723e-06, "loss": 0.4155, "step": 10749, "task_loss": 1.6755656003952026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3997635543346405, "epoch": 9.09, "learning_rate": 4.564666103127642e-06, "loss": 0.326, "step": 10750, "task_loss": 0.22782394289970398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2671975791454315, "epoch": 9.09, "learning_rate": 4.56043956043956e-06, "loss": 0.4, "step": 10751, "task_loss": 0.3082874119281769 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20449531078338623, "epoch": 9.09, "learning_rate": 4.55621301775148e-06, "loss": 0.3801, "step": 10752, "task_loss": 0.5542439222335815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23802286386489868, "epoch": 9.09, "learning_rate": 4.551986475063398e-06, "loss": 0.354, "step": 10753, "task_loss": 0.36673253774642944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2485799491405487, "epoch": 9.09, "learning_rate": 4.5477599323753176e-06, "loss": 0.3071, "step": 10754, "task_loss": 0.6225672364234924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46099853515625, "epoch": 9.09, "learning_rate": 4.543533389687236e-06, "loss": 0.4224, "step": 10755, "task_loss": 0.7647070288658142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2656293213367462, "epoch": 9.09, "learning_rate": 4.539306846999155e-06, "loss": 0.3815, "step": 10756, "task_loss": 0.534451425075531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3697848320007324, "epoch": 9.09, "learning_rate": 4.535080304311073e-06, "loss": 0.385, "step": 10757, "task_loss": 0.804455041885376 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3053297996520996, "epoch": 9.09, "learning_rate": 4.530853761622992e-06, "loss": 0.3177, "step": 10758, "task_loss": 1.5795748233795166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.222305029630661, "epoch": 9.09, "learning_rate": 4.526627218934911e-06, "loss": 0.3634, "step": 10759, "task_loss": 0.5249326229095459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4668792486190796, "epoch": 9.1, "learning_rate": 4.52240067624683e-06, "loss": 0.3531, "step": 10760, "task_loss": 0.5021543502807617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2608847916126251, "epoch": 9.1, "learning_rate": 4.51817413355875e-06, "loss": 0.3592, "step": 10761, "task_loss": 0.014046192169189453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22430363297462463, "epoch": 9.1, "learning_rate": 4.513947590870668e-06, "loss": 0.2756, "step": 10762, "task_loss": 0.2467626929283142 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2924463748931885, "epoch": 9.1, "learning_rate": 4.509721048182587e-06, "loss": 0.3585, "step": 10763, "task_loss": 0.2170679122209549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5852993726730347, "epoch": 9.1, "learning_rate": 4.505494505494506e-06, "loss": 0.378, "step": 10764, "task_loss": 0.2560487687587738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27196985483169556, "epoch": 9.1, "learning_rate": 4.5012679628064245e-06, "loss": 0.383, "step": 10765, "task_loss": 0.8842899799346924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37693604826927185, "epoch": 9.1, "learning_rate": 4.497041420118343e-06, "loss": 0.415, "step": 10766, "task_loss": 0.18818359076976776 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39774179458618164, "epoch": 9.1, "learning_rate": 4.492814877430262e-06, "loss": 0.4105, "step": 10767, "task_loss": 1.410965085029602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27049389481544495, "epoch": 9.1, "learning_rate": 4.488588334742181e-06, "loss": 0.4349, "step": 10768, "task_loss": 0.6354483962059021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.347796767950058, "epoch": 9.1, "learning_rate": 4.4843617920541e-06, "loss": 0.3914, "step": 10769, "task_loss": 0.4951612055301666 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2434314489364624, "epoch": 9.1, "learning_rate": 4.480135249366019e-06, "loss": 0.3397, "step": 10770, "task_loss": 0.061899181455373764 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19929280877113342, "epoch": 9.1, "learning_rate": 4.475908706677938e-06, "loss": 0.3087, "step": 10771, "task_loss": 0.3122748136520386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35466569662094116, "epoch": 9.11, "learning_rate": 4.471682163989857e-06, "loss": 0.369, "step": 10772, "task_loss": 0.11722215265035629 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4237186014652252, "epoch": 9.11, "learning_rate": 4.467455621301776e-06, "loss": 0.4373, "step": 10773, "task_loss": 1.364401936531067 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5618196725845337, "epoch": 9.11, "learning_rate": 4.463229078613694e-06, "loss": 0.3597, "step": 10774, "task_loss": 0.7290211915969849 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4054659605026245, "epoch": 9.11, "learning_rate": 4.4590025359256134e-06, "loss": 0.4968, "step": 10775, "task_loss": 0.7176336050033569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4422805905342102, "epoch": 9.11, "learning_rate": 4.4547759932375315e-06, "loss": 0.3071, "step": 10776, "task_loss": 0.10663993656635284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5104308128356934, "epoch": 9.11, "learning_rate": 4.450549450549451e-06, "loss": 0.3749, "step": 10777, "task_loss": 1.015378713607788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5120430588722229, "epoch": 9.11, "learning_rate": 4.446322907861369e-06, "loss": 0.5268, "step": 10778, "task_loss": 0.28718680143356323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5109079480171204, "epoch": 9.11, "learning_rate": 4.442096365173288e-06, "loss": 0.423, "step": 10779, "task_loss": 0.2126019448041916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1872909963130951, "epoch": 9.11, "learning_rate": 4.437869822485207e-06, "loss": 0.3409, "step": 10780, "task_loss": 0.8963184356689453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.401829332113266, "epoch": 9.11, "learning_rate": 4.433643279797126e-06, "loss": 0.3096, "step": 10781, "task_loss": 0.4105207622051239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23689891397953033, "epoch": 9.11, "learning_rate": 4.429416737109045e-06, "loss": 0.2445, "step": 10782, "task_loss": 0.8406711220741272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.289004921913147, "epoch": 9.11, "learning_rate": 4.425190194420964e-06, "loss": 0.3941, "step": 10783, "task_loss": 0.5913785099983215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42233243584632874, "epoch": 9.12, "learning_rate": 4.420963651732883e-06, "loss": 0.4009, "step": 10784, "task_loss": 0.19679613411426544 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3874870538711548, "epoch": 9.12, "learning_rate": 4.4167371090448015e-06, "loss": 0.3574, "step": 10785, "task_loss": 1.3830676078796387 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3288381099700928, "epoch": 9.12, "learning_rate": 4.41251056635672e-06, "loss": 0.4197, "step": 10786, "task_loss": 0.43626508116722107 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5049364566802979, "epoch": 9.12, "learning_rate": 4.408284023668639e-06, "loss": 0.3566, "step": 10787, "task_loss": 1.0110511779785156 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2575474977493286, "epoch": 9.12, "learning_rate": 4.404057480980558e-06, "loss": 0.373, "step": 10788, "task_loss": 0.2532555162906647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3124488890171051, "epoch": 9.12, "learning_rate": 4.399830938292477e-06, "loss": 0.4488, "step": 10789, "task_loss": 0.5693467259407043 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25977104902267456, "epoch": 9.12, "learning_rate": 4.395604395604396e-06, "loss": 0.317, "step": 10790, "task_loss": 0.5720422267913818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29429638385772705, "epoch": 9.12, "learning_rate": 4.391377852916315e-06, "loss": 0.3396, "step": 10791, "task_loss": 0.5651503801345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1949886828660965, "epoch": 9.12, "learning_rate": 4.387151310228234e-06, "loss": 0.2594, "step": 10792, "task_loss": 0.9995611906051636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.453154981136322, "epoch": 9.12, "learning_rate": 4.382924767540153e-06, "loss": 0.5599, "step": 10793, "task_loss": 0.697390615940094 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.410897433757782, "epoch": 9.12, "learning_rate": 4.3786982248520715e-06, "loss": 0.5007, "step": 10794, "task_loss": 0.3754793107509613 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24273744225502014, "epoch": 9.13, "learning_rate": 4.37447168216399e-06, "loss": 0.2748, "step": 10795, "task_loss": 0.3099212050437927 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46858909726142883, "epoch": 9.13, "learning_rate": 4.370245139475909e-06, "loss": 0.442, "step": 10796, "task_loss": 0.5783044099807739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23016443848609924, "epoch": 9.13, "learning_rate": 4.366018596787827e-06, "loss": 0.3046, "step": 10797, "task_loss": 0.11538434773683548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27203109860420227, "epoch": 9.13, "learning_rate": 4.361792054099747e-06, "loss": 0.3768, "step": 10798, "task_loss": 0.19954493641853333 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3558979034423828, "epoch": 9.13, "learning_rate": 4.357565511411665e-06, "loss": 0.3013, "step": 10799, "task_loss": 0.2688828408718109 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4067249000072479, "epoch": 9.13, "learning_rate": 4.353338968723585e-06, "loss": 0.4609, "step": 10800, "task_loss": 0.668369710445404 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2674903869628906, "epoch": 9.13, "learning_rate": 4.349112426035503e-06, "loss": 0.4023, "step": 10801, "task_loss": 0.5800718665122986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35724711418151855, "epoch": 9.13, "learning_rate": 4.344885883347422e-06, "loss": 0.3171, "step": 10802, "task_loss": 0.3377666175365448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.349151611328125, "epoch": 9.13, "learning_rate": 4.340659340659341e-06, "loss": 0.4415, "step": 10803, "task_loss": 1.025716781616211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3418132960796356, "epoch": 9.13, "learning_rate": 4.33643279797126e-06, "loss": 0.3339, "step": 10804, "task_loss": 0.8026849031448364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34848421812057495, "epoch": 9.13, "learning_rate": 4.3322062552831785e-06, "loss": 0.3832, "step": 10805, "task_loss": 0.3262981176376343 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14944732189178467, "epoch": 9.13, "learning_rate": 4.327979712595097e-06, "loss": 0.2701, "step": 10806, "task_loss": 0.06320273876190186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1887546330690384, "epoch": 9.14, "learning_rate": 4.323753169907016e-06, "loss": 0.2458, "step": 10807, "task_loss": 0.1402512639760971 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2622518837451935, "epoch": 9.14, "learning_rate": 4.319526627218935e-06, "loss": 0.2813, "step": 10808, "task_loss": 0.18590986728668213 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49820035696029663, "epoch": 9.14, "learning_rate": 4.315300084530854e-06, "loss": 0.299, "step": 10809, "task_loss": 0.6286423206329346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23891744017601013, "epoch": 9.14, "learning_rate": 4.311073541842773e-06, "loss": 0.3337, "step": 10810, "task_loss": 0.37884652614593506 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3054962754249573, "epoch": 9.14, "learning_rate": 4.306846999154691e-06, "loss": 0.3747, "step": 10811, "task_loss": 0.19021984934806824 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19608978927135468, "epoch": 9.14, "learning_rate": 4.302620456466611e-06, "loss": 0.3385, "step": 10812, "task_loss": 0.13306094706058502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5409775972366333, "epoch": 9.14, "learning_rate": 4.298393913778529e-06, "loss": 0.3888, "step": 10813, "task_loss": 0.7704942226409912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48843520879745483, "epoch": 9.14, "learning_rate": 4.2941673710904485e-06, "loss": 0.4052, "step": 10814, "task_loss": 0.6687980890274048 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21563029289245605, "epoch": 9.14, "learning_rate": 4.2899408284023666e-06, "loss": 0.3536, "step": 10815, "task_loss": 0.4911973476409912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2460443526506424, "epoch": 9.14, "learning_rate": 4.285714285714286e-06, "loss": 0.3092, "step": 10816, "task_loss": 0.2874479591846466 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45760709047317505, "epoch": 9.14, "learning_rate": 4.281487743026204e-06, "loss": 0.4711, "step": 10817, "task_loss": 0.44065314531326294 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4063870906829834, "epoch": 9.14, "learning_rate": 4.277261200338123e-06, "loss": 0.4091, "step": 10818, "task_loss": 0.7656653523445129 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16463279724121094, "epoch": 9.15, "learning_rate": 4.273034657650042e-06, "loss": 0.2469, "step": 10819, "task_loss": 0.16565841436386108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3141283094882965, "epoch": 9.15, "learning_rate": 4.268808114961961e-06, "loss": 0.3301, "step": 10820, "task_loss": 0.32154935598373413 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2942084074020386, "epoch": 9.15, "learning_rate": 4.264581572273881e-06, "loss": 0.4, "step": 10821, "task_loss": 0.3492378294467926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5911177396774292, "epoch": 9.15, "learning_rate": 4.260355029585799e-06, "loss": 0.3982, "step": 10822, "task_loss": 0.5370573401451111 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3472902178764343, "epoch": 9.15, "learning_rate": 4.2561284868977185e-06, "loss": 0.3437, "step": 10823, "task_loss": 0.04951505735516548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3588460683822632, "epoch": 9.15, "learning_rate": 4.251901944209637e-06, "loss": 0.3307, "step": 10824, "task_loss": 0.24767516553401947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39019525051116943, "epoch": 9.15, "learning_rate": 4.2476754015215555e-06, "loss": 0.431, "step": 10825, "task_loss": 0.35043588280677795 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2820150852203369, "epoch": 9.15, "learning_rate": 4.243448858833474e-06, "loss": 0.313, "step": 10826, "task_loss": 0.5107693076133728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2387271523475647, "epoch": 9.15, "learning_rate": 4.239222316145393e-06, "loss": 0.4, "step": 10827, "task_loss": 1.157117486000061 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2085588276386261, "epoch": 9.15, "learning_rate": 4.234995773457312e-06, "loss": 0.3592, "step": 10828, "task_loss": 0.48905205726623535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6823012828826904, "epoch": 9.15, "learning_rate": 4.230769230769231e-06, "loss": 0.4801, "step": 10829, "task_loss": 0.46268802881240845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20702511072158813, "epoch": 9.15, "learning_rate": 4.22654268808115e-06, "loss": 0.2952, "step": 10830, "task_loss": 0.8787353038787842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.329217791557312, "epoch": 9.16, "learning_rate": 4.222316145393069e-06, "loss": 0.4165, "step": 10831, "task_loss": 0.5806611180305481 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49163514375686646, "epoch": 9.16, "learning_rate": 4.218089602704988e-06, "loss": 0.428, "step": 10832, "task_loss": 0.40630850195884705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30569207668304443, "epoch": 9.16, "learning_rate": 4.213863060016907e-06, "loss": 0.3955, "step": 10833, "task_loss": 0.4833647608757019 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32855385541915894, "epoch": 9.16, "learning_rate": 4.209636517328825e-06, "loss": 0.3755, "step": 10834, "task_loss": 0.917308509349823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38809335231781006, "epoch": 9.16, "learning_rate": 4.205409974640744e-06, "loss": 0.3976, "step": 10835, "task_loss": 0.5272912979125977 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2863474190235138, "epoch": 9.16, "learning_rate": 4.2011834319526624e-06, "loss": 0.3453, "step": 10836, "task_loss": 0.7223553657531738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44409701228141785, "epoch": 9.16, "learning_rate": 4.196956889264582e-06, "loss": 0.3974, "step": 10837, "task_loss": 0.6640463471412659 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24882397055625916, "epoch": 9.16, "learning_rate": 4.1927303465765e-06, "loss": 0.4732, "step": 10838, "task_loss": 0.4024899899959564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3268924355506897, "epoch": 9.16, "learning_rate": 4.18850380388842e-06, "loss": 0.348, "step": 10839, "task_loss": 0.2844221591949463 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3103012442588806, "epoch": 9.16, "learning_rate": 4.184277261200338e-06, "loss": 0.3963, "step": 10840, "task_loss": 0.6189307570457458 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3232678472995758, "epoch": 9.16, "learning_rate": 4.180050718512257e-06, "loss": 0.417, "step": 10841, "task_loss": 0.6324960589408875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2506742477416992, "epoch": 9.16, "learning_rate": 4.175824175824176e-06, "loss": 0.3506, "step": 10842, "task_loss": 0.44172731041908264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.279115229845047, "epoch": 9.17, "learning_rate": 4.171597633136095e-06, "loss": 0.3087, "step": 10843, "task_loss": 1.0566439628601074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23468446731567383, "epoch": 9.17, "learning_rate": 4.167371090448014e-06, "loss": 0.3883, "step": 10844, "task_loss": 0.24102312326431274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23641732335090637, "epoch": 9.17, "learning_rate": 4.1631445477599325e-06, "loss": 0.3168, "step": 10845, "task_loss": 0.23073643445968628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2851032018661499, "epoch": 9.17, "learning_rate": 4.158918005071851e-06, "loss": 0.3434, "step": 10846, "task_loss": 0.6747862696647644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.493721604347229, "epoch": 9.17, "learning_rate": 4.15469146238377e-06, "loss": 0.3222, "step": 10847, "task_loss": 0.8560011386871338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36523839831352234, "epoch": 9.17, "learning_rate": 4.150464919695689e-06, "loss": 0.3498, "step": 10848, "task_loss": 0.5505246520042419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49917393922805786, "epoch": 9.17, "learning_rate": 4.146238377007608e-06, "loss": 0.4057, "step": 10849, "task_loss": 0.5482868552207947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1424364447593689, "epoch": 9.17, "learning_rate": 4.142011834319527e-06, "loss": 0.3282, "step": 10850, "task_loss": 0.5905613899230957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33855384588241577, "epoch": 9.17, "learning_rate": 4.137785291631446e-06, "loss": 0.4336, "step": 10851, "task_loss": 0.5299764275550842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2510131597518921, "epoch": 9.17, "learning_rate": 4.133558748943365e-06, "loss": 0.2721, "step": 10852, "task_loss": 0.905805230140686 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2865440845489502, "epoch": 9.17, "learning_rate": 4.129332206255284e-06, "loss": 0.4205, "step": 10853, "task_loss": 1.0691314935684204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4508817195892334, "epoch": 9.17, "learning_rate": 4.1251056635672025e-06, "loss": 0.4624, "step": 10854, "task_loss": 0.46122539043426514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3504142463207245, "epoch": 9.18, "learning_rate": 4.120879120879121e-06, "loss": 0.3559, "step": 10855, "task_loss": 0.6790086030960083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27813583612442017, "epoch": 9.18, "learning_rate": 4.11665257819104e-06, "loss": 0.4212, "step": 10856, "task_loss": 0.6236938834190369 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3508034348487854, "epoch": 9.18, "learning_rate": 4.112426035502958e-06, "loss": 0.3861, "step": 10857, "task_loss": 0.34262847900390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.509964108467102, "epoch": 9.18, "learning_rate": 4.108199492814878e-06, "loss": 0.3654, "step": 10858, "task_loss": 0.2515036165714264 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4743852913379669, "epoch": 9.18, "learning_rate": 4.103972950126796e-06, "loss": 0.3272, "step": 10859, "task_loss": 0.9125486612319946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14461076259613037, "epoch": 9.18, "learning_rate": 4.099746407438716e-06, "loss": 0.2916, "step": 10860, "task_loss": 0.2800450921058655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40469595789909363, "epoch": 9.18, "learning_rate": 4.095519864750634e-06, "loss": 0.3647, "step": 10861, "task_loss": 0.8845595121383667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29161354899406433, "epoch": 9.18, "learning_rate": 4.091293322062554e-06, "loss": 0.3104, "step": 10862, "task_loss": 0.5635102987289429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24710771441459656, "epoch": 9.18, "learning_rate": 4.087066779374472e-06, "loss": 0.2471, "step": 10863, "task_loss": 0.31261780858039856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2973331809043884, "epoch": 9.18, "learning_rate": 4.0828402366863906e-06, "loss": 0.4577, "step": 10864, "task_loss": 0.9894577860832214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23789219558238983, "epoch": 9.18, "learning_rate": 4.0786136939983095e-06, "loss": 0.3082, "step": 10865, "task_loss": 0.3083100914955139 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4020971953868866, "epoch": 9.19, "learning_rate": 4.074387151310228e-06, "loss": 0.413, "step": 10866, "task_loss": 1.3575773239135742 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23139192163944244, "epoch": 9.19, "learning_rate": 4.070160608622147e-06, "loss": 0.3787, "step": 10867, "task_loss": 0.7296725511550903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2534216344356537, "epoch": 9.19, "learning_rate": 4.065934065934066e-06, "loss": 0.3212, "step": 10868, "task_loss": 0.7202332019805908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17211177945137024, "epoch": 9.19, "learning_rate": 4.061707523245985e-06, "loss": 0.3443, "step": 10869, "task_loss": 0.01822839118540287 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4365965723991394, "epoch": 9.19, "learning_rate": 4.057480980557904e-06, "loss": 0.3509, "step": 10870, "task_loss": 0.42886412143707275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6573877930641174, "epoch": 9.19, "learning_rate": 4.053254437869823e-06, "loss": 0.3724, "step": 10871, "task_loss": 0.6392317414283752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4887334406375885, "epoch": 9.19, "learning_rate": 4.049027895181742e-06, "loss": 0.5488, "step": 10872, "task_loss": 0.7437435984611511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16104495525360107, "epoch": 9.19, "learning_rate": 4.04480135249366e-06, "loss": 0.2974, "step": 10873, "task_loss": 0.3282138407230377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24984420835971832, "epoch": 9.19, "learning_rate": 4.0405748098055795e-06, "loss": 0.4116, "step": 10874, "task_loss": 0.48430758714675903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2619593143463135, "epoch": 9.19, "learning_rate": 4.0363482671174975e-06, "loss": 0.2358, "step": 10875, "task_loss": 0.6308826208114624 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38115665316581726, "epoch": 9.19, "learning_rate": 4.032121724429417e-06, "loss": 0.4476, "step": 10876, "task_loss": 0.6427804827690125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39664244651794434, "epoch": 9.19, "learning_rate": 4.027895181741335e-06, "loss": 0.4083, "step": 10877, "task_loss": 0.5090481042861938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22147826850414276, "epoch": 9.2, "learning_rate": 4.023668639053255e-06, "loss": 0.3935, "step": 10878, "task_loss": 0.06252767145633698 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.443624883890152, "epoch": 9.2, "learning_rate": 4.019442096365174e-06, "loss": 0.4139, "step": 10879, "task_loss": 0.8653573393821716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4937755763530731, "epoch": 9.2, "learning_rate": 4.015215553677092e-06, "loss": 0.5055, "step": 10880, "task_loss": 1.0028654336929321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3918817341327667, "epoch": 9.2, "learning_rate": 4.010989010989012e-06, "loss": 0.397, "step": 10881, "task_loss": 0.3180001676082611 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3301582634449005, "epoch": 9.2, "learning_rate": 4.00676246830093e-06, "loss": 0.4935, "step": 10882, "task_loss": 1.3016095161437988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5090761184692383, "epoch": 9.2, "learning_rate": 4.0025359256128495e-06, "loss": 0.4918, "step": 10883, "task_loss": 0.9981525540351868 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.245442196726799, "epoch": 9.2, "learning_rate": 3.9983093829247675e-06, "loss": 0.266, "step": 10884, "task_loss": 0.15953753888607025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2800212502479553, "epoch": 9.2, "learning_rate": 3.9940828402366864e-06, "loss": 0.4029, "step": 10885, "task_loss": 0.5439020991325378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42538100481033325, "epoch": 9.2, "learning_rate": 3.989856297548605e-06, "loss": 0.4544, "step": 10886, "task_loss": 0.35914820432662964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31956934928894043, "epoch": 9.2, "learning_rate": 3.985629754860524e-06, "loss": 0.3918, "step": 10887, "task_loss": 0.4242209792137146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1609598994255066, "epoch": 9.2, "learning_rate": 3.981403212172443e-06, "loss": 0.2285, "step": 10888, "task_loss": 0.5662160515785217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6493076682090759, "epoch": 9.2, "learning_rate": 3.977176669484362e-06, "loss": 0.4032, "step": 10889, "task_loss": 1.171083688735962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.421529620885849, "epoch": 9.21, "learning_rate": 3.972950126796281e-06, "loss": 0.3771, "step": 10890, "task_loss": 0.49656879901885986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2736101746559143, "epoch": 9.21, "learning_rate": 3.9687235841082e-06, "loss": 0.3418, "step": 10891, "task_loss": 0.10867220163345337 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32187724113464355, "epoch": 9.21, "learning_rate": 3.964497041420119e-06, "loss": 0.4061, "step": 10892, "task_loss": 0.44555678963661194 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3557005822658539, "epoch": 9.21, "learning_rate": 3.9602704987320376e-06, "loss": 0.3318, "step": 10893, "task_loss": 0.5973302125930786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3964943289756775, "epoch": 9.21, "learning_rate": 3.9560439560439565e-06, "loss": 0.2838, "step": 10894, "task_loss": 0.47388774156570435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26992467045783997, "epoch": 9.21, "learning_rate": 3.951817413355875e-06, "loss": 0.3673, "step": 10895, "task_loss": 0.37616825103759766 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2727281451225281, "epoch": 9.21, "learning_rate": 3.947590870667793e-06, "loss": 0.3779, "step": 10896, "task_loss": 0.5296937823295593 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1504100263118744, "epoch": 9.21, "learning_rate": 3.943364327979713e-06, "loss": 0.2578, "step": 10897, "task_loss": 0.8117237091064453 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2810574769973755, "epoch": 9.21, "learning_rate": 3.939137785291631e-06, "loss": 0.2518, "step": 10898, "task_loss": 0.1291680783033371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.418807715177536, "epoch": 9.21, "learning_rate": 3.934911242603551e-06, "loss": 0.3558, "step": 10899, "task_loss": 1.055646300315857 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5120546221733093, "epoch": 9.21, "learning_rate": 3.930684699915469e-06, "loss": 0.3928, "step": 10900, "task_loss": 0.8648068904876709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47959765791893005, "epoch": 9.21, "learning_rate": 3.926458157227388e-06, "loss": 0.4048, "step": 10901, "task_loss": 0.3264666497707367 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2359699010848999, "epoch": 9.22, "learning_rate": 3.922231614539307e-06, "loss": 0.3088, "step": 10902, "task_loss": 0.6279082298278809 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19544366002082825, "epoch": 9.22, "learning_rate": 3.918005071851226e-06, "loss": 0.3315, "step": 10903, "task_loss": 0.05388445779681206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27638497948646545, "epoch": 9.22, "learning_rate": 3.9137785291631445e-06, "loss": 0.3884, "step": 10904, "task_loss": 0.5018625259399414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2042871117591858, "epoch": 9.22, "learning_rate": 3.909551986475063e-06, "loss": 0.3475, "step": 10905, "task_loss": 0.5975039005279541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3266107738018036, "epoch": 9.22, "learning_rate": 3.905325443786982e-06, "loss": 0.3685, "step": 10906, "task_loss": 1.1789569854736328 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6518645882606506, "epoch": 9.22, "learning_rate": 3.901098901098901e-06, "loss": 0.4929, "step": 10907, "task_loss": 1.005140781402588 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2009197473526001, "epoch": 9.22, "learning_rate": 3.89687235841082e-06, "loss": 0.3198, "step": 10908, "task_loss": 0.5147883296012878 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4295263886451721, "epoch": 9.22, "learning_rate": 3.892645815722739e-06, "loss": 0.37, "step": 10909, "task_loss": 1.220171570777893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31452563405036926, "epoch": 9.22, "learning_rate": 3.888419273034658e-06, "loss": 0.3429, "step": 10910, "task_loss": 0.4227457642555237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3117865324020386, "epoch": 9.22, "learning_rate": 3.884192730346577e-06, "loss": 0.3468, "step": 10911, "task_loss": 0.30923670530319214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18727166950702667, "epoch": 9.22, "learning_rate": 3.879966187658496e-06, "loss": 0.2574, "step": 10912, "task_loss": 0.5415329337120056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2038675844669342, "epoch": 9.22, "learning_rate": 3.8757396449704146e-06, "loss": 0.2664, "step": 10913, "task_loss": 0.9026894569396973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.52046138048172, "epoch": 9.23, "learning_rate": 3.8715131022823334e-06, "loss": 0.5213, "step": 10914, "task_loss": 0.6144980192184448 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3290405869483948, "epoch": 9.23, "learning_rate": 3.867286559594252e-06, "loss": 0.2744, "step": 10915, "task_loss": 1.0736050605773926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7283310294151306, "epoch": 9.23, "learning_rate": 3.863060016906171e-06, "loss": 0.4392, "step": 10916, "task_loss": 0.8360440135002136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.302859902381897, "epoch": 9.23, "learning_rate": 3.858833474218089e-06, "loss": 0.3762, "step": 10917, "task_loss": 0.7501512169837952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4375344514846802, "epoch": 9.23, "learning_rate": 3.854606931530009e-06, "loss": 0.4478, "step": 10918, "task_loss": 0.34103307127952576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5863479375839233, "epoch": 9.23, "learning_rate": 3.850380388841927e-06, "loss": 0.4381, "step": 10919, "task_loss": 0.33770954608917236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3453470766544342, "epoch": 9.23, "learning_rate": 3.846153846153847e-06, "loss": 0.368, "step": 10920, "task_loss": 0.3951333165168762 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30058974027633667, "epoch": 9.23, "learning_rate": 3.841927303465765e-06, "loss": 0.3175, "step": 10921, "task_loss": 0.44761788845062256 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26135939359664917, "epoch": 9.23, "learning_rate": 3.837700760777685e-06, "loss": 0.3161, "step": 10922, "task_loss": 0.944241464138031 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6498162746429443, "epoch": 9.23, "learning_rate": 3.833474218089603e-06, "loss": 0.5597, "step": 10923, "task_loss": 0.43873777985572815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17337553203105927, "epoch": 9.23, "learning_rate": 3.8292476754015215e-06, "loss": 0.2333, "step": 10924, "task_loss": 0.2576712667942047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.276930570602417, "epoch": 9.23, "learning_rate": 3.82502113271344e-06, "loss": 0.3794, "step": 10925, "task_loss": 0.9625747203826904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27496975660324097, "epoch": 9.24, "learning_rate": 3.820794590025359e-06, "loss": 0.3037, "step": 10926, "task_loss": 0.47768476605415344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26467365026474, "epoch": 9.24, "learning_rate": 3.816568047337278e-06, "loss": 0.3473, "step": 10927, "task_loss": 0.44053125381469727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2823783755302429, "epoch": 9.24, "learning_rate": 3.812341504649197e-06, "loss": 0.4024, "step": 10928, "task_loss": 0.3518713414669037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2675750255584717, "epoch": 9.24, "learning_rate": 3.8081149619611155e-06, "loss": 0.2754, "step": 10929, "task_loss": 0.4231777787208557 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.281505823135376, "epoch": 9.24, "learning_rate": 3.803888419273035e-06, "loss": 0.4318, "step": 10930, "task_loss": 0.8863570690155029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24458247423171997, "epoch": 9.24, "learning_rate": 3.7996618765849533e-06, "loss": 0.2778, "step": 10931, "task_loss": 0.5408713817596436 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37484899163246155, "epoch": 9.24, "learning_rate": 3.7954353338968726e-06, "loss": 0.3493, "step": 10932, "task_loss": 0.11423295736312866 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.412622332572937, "epoch": 9.24, "learning_rate": 3.791208791208791e-06, "loss": 0.3536, "step": 10933, "task_loss": 0.6945928335189819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4011216163635254, "epoch": 9.24, "learning_rate": 3.7869822485207104e-06, "loss": 0.331, "step": 10934, "task_loss": 1.1550707817077637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28640639781951904, "epoch": 9.24, "learning_rate": 3.782755705832629e-06, "loss": 0.3988, "step": 10935, "task_loss": 0.36315611004829407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395368158817291, "epoch": 9.24, "learning_rate": 3.778529163144548e-06, "loss": 0.4247, "step": 10936, "task_loss": 0.316150963306427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31310397386550903, "epoch": 9.24, "learning_rate": 3.7743026204564663e-06, "loss": 0.3517, "step": 10937, "task_loss": 0.5538923144340515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3481917977333069, "epoch": 9.25, "learning_rate": 3.7700760777683856e-06, "loss": 0.3437, "step": 10938, "task_loss": 0.7409000396728516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4529663026332855, "epoch": 9.25, "learning_rate": 3.765849535080305e-06, "loss": 0.3874, "step": 10939, "task_loss": 0.6403841972351074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25365138053894043, "epoch": 9.25, "learning_rate": 3.7616229923922234e-06, "loss": 0.3634, "step": 10940, "task_loss": 0.15202222764492035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5728182196617126, "epoch": 9.25, "learning_rate": 3.7573964497041427e-06, "loss": 0.4113, "step": 10941, "task_loss": 1.1576825380325317 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23296308517456055, "epoch": 9.25, "learning_rate": 3.753169907016061e-06, "loss": 0.3967, "step": 10942, "task_loss": 0.05683799833059311 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.380463570356369, "epoch": 9.25, "learning_rate": 3.74894336432798e-06, "loss": 0.3772, "step": 10943, "task_loss": 1.3070838451385498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3895018398761749, "epoch": 9.25, "learning_rate": 3.7447168216398985e-06, "loss": 0.3434, "step": 10944, "task_loss": 0.6772066950798035 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2726972699165344, "epoch": 9.25, "learning_rate": 3.740490278951818e-06, "loss": 0.3081, "step": 10945, "task_loss": 0.9403440952301025 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.750728964805603, "epoch": 9.25, "learning_rate": 3.7362637362637363e-06, "loss": 0.6266, "step": 10946, "task_loss": 0.8400091528892517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2021588534116745, "epoch": 9.25, "learning_rate": 3.7320371935756556e-06, "loss": 0.2756, "step": 10947, "task_loss": 0.25848424434661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6216771602630615, "epoch": 9.25, "learning_rate": 3.727810650887574e-06, "loss": 0.4214, "step": 10948, "task_loss": 0.4798125624656677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20072919130325317, "epoch": 9.26, "learning_rate": 3.7235841081994934e-06, "loss": 0.3727, "step": 10949, "task_loss": 1.859929084777832 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21267297863960266, "epoch": 9.26, "learning_rate": 3.719357565511412e-06, "loss": 0.3265, "step": 10950, "task_loss": 0.3794448673725128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2294258028268814, "epoch": 9.26, "learning_rate": 3.7151310228233307e-06, "loss": 0.3656, "step": 10951, "task_loss": 0.5274657011032104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28940099477767944, "epoch": 9.26, "learning_rate": 3.710904480135249e-06, "loss": 0.3166, "step": 10952, "task_loss": 1.2432137727737427 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5230440497398376, "epoch": 9.26, "learning_rate": 3.7066779374471685e-06, "loss": 0.4425, "step": 10953, "task_loss": 0.23548617959022522 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.328269898891449, "epoch": 9.26, "learning_rate": 3.702451394759087e-06, "loss": 0.3651, "step": 10954, "task_loss": 1.5223937034606934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4088091552257538, "epoch": 9.26, "learning_rate": 3.6982248520710063e-06, "loss": 0.3338, "step": 10955, "task_loss": 0.39432278275489807 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30290836095809937, "epoch": 9.26, "learning_rate": 3.6939983093829248e-06, "loss": 0.2839, "step": 10956, "task_loss": 0.3671436905860901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3768446743488312, "epoch": 9.26, "learning_rate": 3.689771766694844e-06, "loss": 0.3686, "step": 10957, "task_loss": 0.43358027935028076 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.272566556930542, "epoch": 9.26, "learning_rate": 3.6855452240067626e-06, "loss": 0.3659, "step": 10958, "task_loss": 0.5586867928504944 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21464037895202637, "epoch": 9.26, "learning_rate": 3.6813186813186814e-06, "loss": 0.3118, "step": 10959, "task_loss": 0.4399377107620239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27069610357284546, "epoch": 9.26, "learning_rate": 3.6770921386306e-06, "loss": 0.352, "step": 10960, "task_loss": 0.7179108262062073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3811045289039612, "epoch": 9.27, "learning_rate": 3.6728655959425192e-06, "loss": 0.4334, "step": 10961, "task_loss": 0.7550804615020752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3838531970977783, "epoch": 9.27, "learning_rate": 3.6686390532544377e-06, "loss": 0.4031, "step": 10962, "task_loss": 0.5705326795578003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24511802196502686, "epoch": 9.27, "learning_rate": 3.664412510566357e-06, "loss": 0.3077, "step": 10963, "task_loss": 1.2790706157684326 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37313956022262573, "epoch": 9.27, "learning_rate": 3.6601859678782755e-06, "loss": 0.3109, "step": 10964, "task_loss": 0.2583644390106201 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2917196452617645, "epoch": 9.27, "learning_rate": 3.655959425190195e-06, "loss": 0.2744, "step": 10965, "task_loss": 0.3394607603549957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33091214299201965, "epoch": 9.27, "learning_rate": 3.6517328825021133e-06, "loss": 0.3297, "step": 10966, "task_loss": 0.2677742540836334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16183951497077942, "epoch": 9.27, "learning_rate": 3.647506339814032e-06, "loss": 0.2684, "step": 10967, "task_loss": 0.12940217554569244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4263492822647095, "epoch": 9.27, "learning_rate": 3.6432797971259506e-06, "loss": 0.3809, "step": 10968, "task_loss": 0.7249758839607239 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3830980956554413, "epoch": 9.27, "learning_rate": 3.63905325443787e-06, "loss": 0.3199, "step": 10969, "task_loss": 0.4624652564525604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3715564012527466, "epoch": 9.27, "learning_rate": 3.6348267117497893e-06, "loss": 0.4076, "step": 10970, "task_loss": 0.09809459745883942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.450778067111969, "epoch": 9.27, "learning_rate": 3.6306001690617077e-06, "loss": 0.4605, "step": 10971, "task_loss": 1.3819876909255981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22241252660751343, "epoch": 9.27, "learning_rate": 3.626373626373627e-06, "loss": 0.4126, "step": 10972, "task_loss": 0.10224239528179169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3260951638221741, "epoch": 9.28, "learning_rate": 3.6221470836855455e-06, "loss": 0.3765, "step": 10973, "task_loss": 0.2895330786705017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40865862369537354, "epoch": 9.28, "learning_rate": 3.6179205409974644e-06, "loss": 0.3444, "step": 10974, "task_loss": 0.3284333050251007 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44510844349861145, "epoch": 9.28, "learning_rate": 3.613693998309383e-06, "loss": 0.3402, "step": 10975, "task_loss": 0.2670443058013916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6085115671157837, "epoch": 9.28, "learning_rate": 3.609467455621302e-06, "loss": 0.4282, "step": 10976, "task_loss": 0.2907293140888214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32963934540748596, "epoch": 9.28, "learning_rate": 3.6052409129332206e-06, "loss": 0.4219, "step": 10977, "task_loss": 0.8998923897743225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3749598264694214, "epoch": 9.28, "learning_rate": 3.60101437024514e-06, "loss": 0.3874, "step": 10978, "task_loss": 0.37733250856399536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31837302446365356, "epoch": 9.28, "learning_rate": 3.5967878275570584e-06, "loss": 0.2408, "step": 10979, "task_loss": 0.8546398878097534 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31893807649612427, "epoch": 9.28, "learning_rate": 3.5925612848689777e-06, "loss": 0.3398, "step": 10980, "task_loss": 0.9907719492912292 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.543782651424408, "epoch": 9.28, "learning_rate": 3.5883347421808962e-06, "loss": 0.4062, "step": 10981, "task_loss": 1.19627845287323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2998760938644409, "epoch": 9.28, "learning_rate": 3.584108199492815e-06, "loss": 0.3882, "step": 10982, "task_loss": 0.6442987322807312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3204306960105896, "epoch": 9.28, "learning_rate": 3.5798816568047336e-06, "loss": 0.328, "step": 10983, "task_loss": 0.5392677187919617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17468169331550598, "epoch": 9.28, "learning_rate": 3.575655114116653e-06, "loss": 0.2504, "step": 10984, "task_loss": 1.106391429901123 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17096957564353943, "epoch": 9.29, "learning_rate": 3.5714285714285714e-06, "loss": 0.3143, "step": 10985, "task_loss": 0.11747336387634277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2537137567996979, "epoch": 9.29, "learning_rate": 3.5672020287404907e-06, "loss": 0.3988, "step": 10986, "task_loss": 0.5070047974586487 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3965149223804474, "epoch": 9.29, "learning_rate": 3.562975486052409e-06, "loss": 0.4141, "step": 10987, "task_loss": 0.7759620547294617 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4468211829662323, "epoch": 9.29, "learning_rate": 3.5587489433643285e-06, "loss": 0.4267, "step": 10988, "task_loss": 1.0616810321807861 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2447449117898941, "epoch": 9.29, "learning_rate": 3.554522400676247e-06, "loss": 0.3487, "step": 10989, "task_loss": 0.21285954117774963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34608194231987, "epoch": 9.29, "learning_rate": 3.550295857988166e-06, "loss": 0.3429, "step": 10990, "task_loss": 0.14111295342445374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3419375717639923, "epoch": 9.29, "learning_rate": 3.5460693153000843e-06, "loss": 0.3183, "step": 10991, "task_loss": 0.5419254302978516 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42309314012527466, "epoch": 9.29, "learning_rate": 3.5418427726120036e-06, "loss": 0.3111, "step": 10992, "task_loss": 0.4497659504413605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6240777373313904, "epoch": 9.29, "learning_rate": 3.537616229923922e-06, "loss": 0.3926, "step": 10993, "task_loss": 1.184451937675476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18123583495616913, "epoch": 9.29, "learning_rate": 3.5333896872358414e-06, "loss": 0.2873, "step": 10994, "task_loss": 1.2545934915542603 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5459150075912476, "epoch": 9.29, "learning_rate": 3.52916314454776e-06, "loss": 0.3628, "step": 10995, "task_loss": 0.7613987326622009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33714455366134644, "epoch": 9.29, "learning_rate": 3.524936601859679e-06, "loss": 0.3338, "step": 10996, "task_loss": 0.5930745601654053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4005102217197418, "epoch": 9.3, "learning_rate": 3.5207100591715976e-06, "loss": 0.4147, "step": 10997, "task_loss": 0.21574874222278595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.271607905626297, "epoch": 9.3, "learning_rate": 3.5164835164835165e-06, "loss": 0.2933, "step": 10998, "task_loss": 0.2806920111179352 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3341159522533417, "epoch": 9.3, "learning_rate": 3.512256973795436e-06, "loss": 0.3537, "step": 10999, "task_loss": 0.31831231713294983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2314402461051941, "epoch": 9.3, "learning_rate": 3.5080304311073543e-06, "loss": 0.3196, "step": 11000, "task_loss": 0.11587528139352798 }, { "epoch": 9.3, "eval_accuracy": 0.917980198019802, "eval_loss": 0.24436061084270477, "eval_runtime": 225.5759, "eval_samples_per_second": 111.936, "eval_steps_per_second": 0.878, "step": 11000 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5138021111488342, "epoch": 9.3, "learning_rate": 3.5038038884192736e-06, "loss": 0.4471, "step": 11001, "task_loss": 0.8041527271270752 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39428335428237915, "epoch": 9.3, "learning_rate": 3.499577345731192e-06, "loss": 0.3563, "step": 11002, "task_loss": 1.0692877769470215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3546184301376343, "epoch": 9.3, "learning_rate": 3.495350803043111e-06, "loss": 0.4144, "step": 11003, "task_loss": 0.7868075370788574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5297516584396362, "epoch": 9.3, "learning_rate": 3.49112426035503e-06, "loss": 0.3983, "step": 11004, "task_loss": 1.1671717166900635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2653290331363678, "epoch": 9.3, "learning_rate": 3.4868977176669488e-06, "loss": 0.214, "step": 11005, "task_loss": 0.04818599671125412 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4491077959537506, "epoch": 9.3, "learning_rate": 3.4826711749788672e-06, "loss": 0.4928, "step": 11006, "task_loss": 0.13281863927841187 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2281665951013565, "epoch": 9.3, "learning_rate": 3.4784446322907865e-06, "loss": 0.3434, "step": 11007, "task_loss": 0.785679042339325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3459298014640808, "epoch": 9.3, "learning_rate": 3.474218089602705e-06, "loss": 0.3259, "step": 11008, "task_loss": 0.5178593397140503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.10948862135410309, "epoch": 9.31, "learning_rate": 3.4699915469146243e-06, "loss": 0.273, "step": 11009, "task_loss": 0.01376782450824976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28809839487075806, "epoch": 9.31, "learning_rate": 3.465765004226543e-06, "loss": 0.3482, "step": 11010, "task_loss": 0.4283992350101471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26329946517944336, "epoch": 9.31, "learning_rate": 3.4615384615384617e-06, "loss": 0.3507, "step": 11011, "task_loss": 0.3022018074989319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6969354748725891, "epoch": 9.31, "learning_rate": 3.4573119188503806e-06, "loss": 0.45, "step": 11012, "task_loss": 0.19120028614997864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34133753180503845, "epoch": 9.31, "learning_rate": 3.4530853761622995e-06, "loss": 0.2691, "step": 11013, "task_loss": 0.49421724677085876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3515082001686096, "epoch": 9.31, "learning_rate": 3.448858833474218e-06, "loss": 0.3355, "step": 11014, "task_loss": 0.7741529941558838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40698665380477905, "epoch": 9.31, "learning_rate": 3.4446322907861373e-06, "loss": 0.3268, "step": 11015, "task_loss": 1.5545374155044556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26684123277664185, "epoch": 9.31, "learning_rate": 3.4404057480980557e-06, "loss": 0.417, "step": 11016, "task_loss": 0.7231999039649963 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5027421116828918, "epoch": 9.31, "learning_rate": 3.436179205409975e-06, "loss": 0.4991, "step": 11017, "task_loss": 0.9126652479171753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31330594420433044, "epoch": 9.31, "learning_rate": 3.4319526627218935e-06, "loss": 0.3356, "step": 11018, "task_loss": 0.4716689884662628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4825432598590851, "epoch": 9.31, "learning_rate": 3.4277261200338124e-06, "loss": 0.4306, "step": 11019, "task_loss": 0.6834752559661865 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27844345569610596, "epoch": 9.32, "learning_rate": 3.4234995773457313e-06, "loss": 0.4129, "step": 11020, "task_loss": 0.6349384784698486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.350713849067688, "epoch": 9.32, "learning_rate": 3.41927303465765e-06, "loss": 0.3601, "step": 11021, "task_loss": 0.7604373097419739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18268582224845886, "epoch": 9.32, "learning_rate": 3.4150464919695687e-06, "loss": 0.3273, "step": 11022, "task_loss": 0.5454663038253784 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28187012672424316, "epoch": 9.32, "learning_rate": 3.410819949281488e-06, "loss": 0.3841, "step": 11023, "task_loss": 0.6284623742103577 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45291391015052795, "epoch": 9.32, "learning_rate": 3.4065934065934064e-06, "loss": 0.3761, "step": 11024, "task_loss": 0.7281866073608398 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35542598366737366, "epoch": 9.32, "learning_rate": 3.4023668639053257e-06, "loss": 0.2719, "step": 11025, "task_loss": 0.6919620633125305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19045458734035492, "epoch": 9.32, "learning_rate": 3.3981403212172442e-06, "loss": 0.3264, "step": 11026, "task_loss": 0.6196629405021667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6128476858139038, "epoch": 9.32, "learning_rate": 3.393913778529163e-06, "loss": 0.345, "step": 11027, "task_loss": 0.5651718378067017 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6871416568756104, "epoch": 9.32, "learning_rate": 3.3896872358410824e-06, "loss": 0.4755, "step": 11028, "task_loss": 1.1522578001022339 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1961861103773117, "epoch": 9.32, "learning_rate": 3.385460693153001e-06, "loss": 0.3857, "step": 11029, "task_loss": 1.2403775453567505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4689747095108032, "epoch": 9.32, "learning_rate": 3.38123415046492e-06, "loss": 0.4099, "step": 11030, "task_loss": 0.35707172751426697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3622818887233734, "epoch": 9.32, "learning_rate": 3.3770076077768387e-06, "loss": 0.3056, "step": 11031, "task_loss": 0.22565320134162903 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45878249406814575, "epoch": 9.33, "learning_rate": 3.372781065088758e-06, "loss": 0.372, "step": 11032, "task_loss": 0.47638577222824097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30390945076942444, "epoch": 9.33, "learning_rate": 3.3685545224006765e-06, "loss": 0.3247, "step": 11033, "task_loss": 0.363949179649353 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2505924105644226, "epoch": 9.33, "learning_rate": 3.3643279797125954e-06, "loss": 0.3907, "step": 11034, "task_loss": 0.331704318523407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2890142500400543, "epoch": 9.33, "learning_rate": 3.360101437024514e-06, "loss": 0.3096, "step": 11035, "task_loss": 0.5168416500091553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22444498538970947, "epoch": 9.33, "learning_rate": 3.355874894336433e-06, "loss": 0.3775, "step": 11036, "task_loss": 0.19301094114780426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21093818545341492, "epoch": 9.33, "learning_rate": 3.3516483516483516e-06, "loss": 0.3235, "step": 11037, "task_loss": 0.1737367957830429 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3835913836956024, "epoch": 9.33, "learning_rate": 3.347421808960271e-06, "loss": 0.3997, "step": 11038, "task_loss": 1.3739631175994873 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30857211351394653, "epoch": 9.33, "learning_rate": 3.3431952662721894e-06, "loss": 0.3385, "step": 11039, "task_loss": 0.36353522539138794 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2637791633605957, "epoch": 9.33, "learning_rate": 3.3389687235841087e-06, "loss": 0.336, "step": 11040, "task_loss": 0.6328445076942444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3103238642215729, "epoch": 9.33, "learning_rate": 3.334742180896027e-06, "loss": 0.327, "step": 11041, "task_loss": 0.29911383986473083 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2658078074455261, "epoch": 9.33, "learning_rate": 3.330515638207946e-06, "loss": 0.411, "step": 11042, "task_loss": 0.1911116987466812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2921827435493469, "epoch": 9.33, "learning_rate": 3.3262890955198645e-06, "loss": 0.4182, "step": 11043, "task_loss": 0.8218912482261658 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3119533061981201, "epoch": 9.34, "learning_rate": 3.322062552831784e-06, "loss": 0.3903, "step": 11044, "task_loss": 0.11271906644105911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36212167143821716, "epoch": 9.34, "learning_rate": 3.3178360101437023e-06, "loss": 0.3574, "step": 11045, "task_loss": 0.40594732761383057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40093764662742615, "epoch": 9.34, "learning_rate": 3.3136094674556216e-06, "loss": 0.3564, "step": 11046, "task_loss": 0.7874270677566528 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23478049039840698, "epoch": 9.34, "learning_rate": 3.30938292476754e-06, "loss": 0.2518, "step": 11047, "task_loss": 0.17699165642261505 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30439358949661255, "epoch": 9.34, "learning_rate": 3.3051563820794594e-06, "loss": 0.5237, "step": 11048, "task_loss": 1.073488473892212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18234124779701233, "epoch": 9.34, "learning_rate": 3.300929839391378e-06, "loss": 0.2784, "step": 11049, "task_loss": 0.3398984968662262 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2595115900039673, "epoch": 9.34, "learning_rate": 3.2967032967032968e-06, "loss": 0.3271, "step": 11050, "task_loss": 0.6616547107696533 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37582555413246155, "epoch": 9.34, "learning_rate": 3.2924767540152152e-06, "loss": 0.3551, "step": 11051, "task_loss": 0.3572853207588196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3168129622936249, "epoch": 9.34, "learning_rate": 3.2882502113271346e-06, "loss": 0.3869, "step": 11052, "task_loss": 0.4606495201587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47435352206230164, "epoch": 9.34, "learning_rate": 3.284023668639053e-06, "loss": 0.3695, "step": 11053, "task_loss": 0.5183403491973877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26282864809036255, "epoch": 9.34, "learning_rate": 3.2797971259509723e-06, "loss": 0.2758, "step": 11054, "task_loss": 0.45322901010513306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.325207382440567, "epoch": 9.34, "learning_rate": 3.275570583262891e-06, "loss": 0.3171, "step": 11055, "task_loss": 0.5011888742446899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1975768506526947, "epoch": 9.35, "learning_rate": 3.27134404057481e-06, "loss": 0.3498, "step": 11056, "task_loss": 1.1277540922164917 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28575214743614197, "epoch": 9.35, "learning_rate": 3.2671174978867286e-06, "loss": 0.3435, "step": 11057, "task_loss": 0.5960267186164856 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3164902925491333, "epoch": 9.35, "learning_rate": 3.2628909551986475e-06, "loss": 0.2867, "step": 11058, "task_loss": 0.17749302089214325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.492826372385025, "epoch": 9.35, "learning_rate": 3.258664412510567e-06, "loss": 0.3331, "step": 11059, "task_loss": 0.37923234701156616 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4420795440673828, "epoch": 9.35, "learning_rate": 3.2544378698224853e-06, "loss": 0.4406, "step": 11060, "task_loss": 0.31104207038879395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36642709374427795, "epoch": 9.35, "learning_rate": 3.2502113271344046e-06, "loss": 0.2988, "step": 11061, "task_loss": 0.5089414715766907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2447928935289383, "epoch": 9.35, "learning_rate": 3.245984784446323e-06, "loss": 0.3991, "step": 11062, "task_loss": 0.640876829624176 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5279614925384521, "epoch": 9.35, "learning_rate": 3.2417582417582424e-06, "loss": 0.4147, "step": 11063, "task_loss": 0.42881929874420166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33532777428627014, "epoch": 9.35, "learning_rate": 3.237531699070161e-06, "loss": 0.349, "step": 11064, "task_loss": 0.8758630156517029 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4493199586868286, "epoch": 9.35, "learning_rate": 3.2333051563820797e-06, "loss": 0.4172, "step": 11065, "task_loss": 0.42403489351272583 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35518717765808105, "epoch": 9.35, "learning_rate": 3.229078613693998e-06, "loss": 0.4044, "step": 11066, "task_loss": 0.36862438917160034 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.603822648525238, "epoch": 9.35, "learning_rate": 3.2248520710059175e-06, "loss": 0.4129, "step": 11067, "task_loss": 0.713758647441864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22391360998153687, "epoch": 9.36, "learning_rate": 3.220625528317836e-06, "loss": 0.3396, "step": 11068, "task_loss": 0.0794229656457901 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18486909568309784, "epoch": 9.36, "learning_rate": 3.2163989856297553e-06, "loss": 0.2956, "step": 11069, "task_loss": 0.07828221470117569 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19117680191993713, "epoch": 9.36, "learning_rate": 3.2121724429416738e-06, "loss": 0.2602, "step": 11070, "task_loss": 0.6718312501907349 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40417739748954773, "epoch": 9.36, "learning_rate": 3.207945900253593e-06, "loss": 0.3964, "step": 11071, "task_loss": 0.718445897102356 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34396642446517944, "epoch": 9.36, "learning_rate": 3.2037193575655115e-06, "loss": 0.3633, "step": 11072, "task_loss": 1.2145955562591553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2959299683570862, "epoch": 9.36, "learning_rate": 3.1994928148774304e-06, "loss": 0.3395, "step": 11073, "task_loss": 0.3921305537223816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2636890113353729, "epoch": 9.36, "learning_rate": 3.195266272189349e-06, "loss": 0.3884, "step": 11074, "task_loss": 0.7005857825279236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2128930687904358, "epoch": 9.36, "learning_rate": 3.191039729501268e-06, "loss": 0.3592, "step": 11075, "task_loss": 0.2898893356323242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25692427158355713, "epoch": 9.36, "learning_rate": 3.1868131868131867e-06, "loss": 0.3596, "step": 11076, "task_loss": 0.5875060558319092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3619915246963501, "epoch": 9.36, "learning_rate": 3.182586644125106e-06, "loss": 0.3039, "step": 11077, "task_loss": 0.5276740789413452 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24015280604362488, "epoch": 9.36, "learning_rate": 3.1783601014370245e-06, "loss": 0.3402, "step": 11078, "task_loss": 0.6459085941314697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22988620400428772, "epoch": 9.36, "learning_rate": 3.1741335587489438e-06, "loss": 0.2984, "step": 11079, "task_loss": 0.12321125715970993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6407639384269714, "epoch": 9.37, "learning_rate": 3.1699070160608622e-06, "loss": 0.4384, "step": 11080, "task_loss": 1.0596818923950195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3968571722507477, "epoch": 9.37, "learning_rate": 3.165680473372781e-06, "loss": 0.3579, "step": 11081, "task_loss": 0.0914900153875351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7607548236846924, "epoch": 9.37, "learning_rate": 3.1614539306846996e-06, "loss": 0.5509, "step": 11082, "task_loss": 1.328239917755127 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5544617772102356, "epoch": 9.37, "learning_rate": 3.157227387996619e-06, "loss": 0.3957, "step": 11083, "task_loss": 0.7410380840301514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3408932089805603, "epoch": 9.37, "learning_rate": 3.1530008453085374e-06, "loss": 0.3907, "step": 11084, "task_loss": 0.4041612148284912 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5012742877006531, "epoch": 9.37, "learning_rate": 3.1487743026204567e-06, "loss": 0.3645, "step": 11085, "task_loss": 0.6453322768211365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.12356162816286087, "epoch": 9.37, "learning_rate": 3.144547759932375e-06, "loss": 0.4553, "step": 11086, "task_loss": 0.27337709069252014 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29178446531295776, "epoch": 9.37, "learning_rate": 3.1403212172442945e-06, "loss": 0.3669, "step": 11087, "task_loss": 0.2888859510421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3878091871738434, "epoch": 9.37, "learning_rate": 3.1360946745562134e-06, "loss": 0.3989, "step": 11088, "task_loss": 0.8689674139022827 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24628372490406036, "epoch": 9.37, "learning_rate": 3.131868131868132e-06, "loss": 0.3527, "step": 11089, "task_loss": 0.40652742981910706 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30798661708831787, "epoch": 9.37, "learning_rate": 3.127641589180051e-06, "loss": 0.4339, "step": 11090, "task_loss": 0.5469135046005249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33946582674980164, "epoch": 9.38, "learning_rate": 3.1234150464919696e-06, "loss": 0.3979, "step": 11091, "task_loss": 0.595075786113739 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28729313611984253, "epoch": 9.38, "learning_rate": 3.1191885038038885e-06, "loss": 0.3503, "step": 11092, "task_loss": 0.24499759078025818 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42708301544189453, "epoch": 9.38, "learning_rate": 3.1149619611158074e-06, "loss": 0.4107, "step": 11093, "task_loss": 0.8493942022323608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5303544998168945, "epoch": 9.38, "learning_rate": 3.1107354184277263e-06, "loss": 0.4103, "step": 11094, "task_loss": 1.5267333984375 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3869002163410187, "epoch": 9.38, "learning_rate": 3.106508875739645e-06, "loss": 0.3593, "step": 11095, "task_loss": 0.8789853453636169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24231404066085815, "epoch": 9.38, "learning_rate": 3.1022823330515637e-06, "loss": 0.3452, "step": 11096, "task_loss": 0.5686526298522949 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21870648860931396, "epoch": 9.38, "learning_rate": 3.0980557903634826e-06, "loss": 0.2211, "step": 11097, "task_loss": 0.29066091775894165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23206254839897156, "epoch": 9.38, "learning_rate": 3.0938292476754014e-06, "loss": 0.4378, "step": 11098, "task_loss": 0.5667880773544312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3592981696128845, "epoch": 9.38, "learning_rate": 3.0896027049873203e-06, "loss": 0.4183, "step": 11099, "task_loss": 0.4001733660697937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28613173961639404, "epoch": 9.38, "learning_rate": 3.0853761622992392e-06, "loss": 0.3511, "step": 11100, "task_loss": 0.4204499423503876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37144899368286133, "epoch": 9.38, "learning_rate": 3.081149619611158e-06, "loss": 0.3554, "step": 11101, "task_loss": 1.710484266281128 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3413456082344055, "epoch": 9.38, "learning_rate": 3.0769230769230774e-06, "loss": 0.3428, "step": 11102, "task_loss": 0.3558732569217682 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3178199827671051, "epoch": 9.39, "learning_rate": 3.072696534234996e-06, "loss": 0.4028, "step": 11103, "task_loss": 0.38671812415122986 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28192704916000366, "epoch": 9.39, "learning_rate": 3.068469991546915e-06, "loss": 0.3574, "step": 11104, "task_loss": 1.3155499696731567 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3366948068141937, "epoch": 9.39, "learning_rate": 3.0642434488588337e-06, "loss": 0.3821, "step": 11105, "task_loss": 0.26136136054992676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24905310571193695, "epoch": 9.39, "learning_rate": 3.0600169061707526e-06, "loss": 0.3619, "step": 11106, "task_loss": 1.037542462348938 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4351212978363037, "epoch": 9.39, "learning_rate": 3.0557903634826715e-06, "loss": 0.4299, "step": 11107, "task_loss": 1.030940055847168 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45062148571014404, "epoch": 9.39, "learning_rate": 3.0515638207945904e-06, "loss": 0.4765, "step": 11108, "task_loss": 1.3921380043029785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29802122712135315, "epoch": 9.39, "learning_rate": 3.0473372781065093e-06, "loss": 0.3424, "step": 11109, "task_loss": 0.6004763841629028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4276250898838043, "epoch": 9.39, "learning_rate": 3.043110735418428e-06, "loss": 0.4188, "step": 11110, "task_loss": 0.5475895404815674 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3173372745513916, "epoch": 9.39, "learning_rate": 3.0388841927303466e-06, "loss": 0.3275, "step": 11111, "task_loss": 0.5360631942749023 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4072100520133972, "epoch": 9.39, "learning_rate": 3.0346576500422655e-06, "loss": 0.4034, "step": 11112, "task_loss": 1.122248888015747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23554542660713196, "epoch": 9.39, "learning_rate": 3.0304311073541844e-06, "loss": 0.2879, "step": 11113, "task_loss": 0.1571430265903473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2392873615026474, "epoch": 9.39, "learning_rate": 3.0262045646661033e-06, "loss": 0.343, "step": 11114, "task_loss": 0.7264078855514526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26673221588134766, "epoch": 9.4, "learning_rate": 3.021978021978022e-06, "loss": 0.3701, "step": 11115, "task_loss": 0.04119856655597687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1630285382270813, "epoch": 9.4, "learning_rate": 3.017751479289941e-06, "loss": 0.327, "step": 11116, "task_loss": 0.5227041244506836 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26777705550193787, "epoch": 9.4, "learning_rate": 3.01352493660186e-06, "loss": 0.3397, "step": 11117, "task_loss": 0.401448130607605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48817840218544006, "epoch": 9.4, "learning_rate": 3.009298393913779e-06, "loss": 0.3959, "step": 11118, "task_loss": 0.2747510075569153 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37120527029037476, "epoch": 9.4, "learning_rate": 3.0050718512256973e-06, "loss": 0.4289, "step": 11119, "task_loss": 0.373191237449646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18152177333831787, "epoch": 9.4, "learning_rate": 3.0008453085376162e-06, "loss": 0.3544, "step": 11120, "task_loss": 0.5906203985214233 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3422137498855591, "epoch": 9.4, "learning_rate": 2.996618765849535e-06, "loss": 0.3601, "step": 11121, "task_loss": 0.5210201144218445 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814418315887451, "epoch": 9.4, "learning_rate": 2.992392223161454e-06, "loss": 0.4982, "step": 11122, "task_loss": 0.36218300461769104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3380776345729828, "epoch": 9.4, "learning_rate": 2.988165680473373e-06, "loss": 0.3983, "step": 11123, "task_loss": 0.45964542031288147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22319814562797546, "epoch": 9.4, "learning_rate": 2.9839391377852918e-06, "loss": 0.3362, "step": 11124, "task_loss": 0.34649941325187683 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5251970291137695, "epoch": 9.4, "learning_rate": 2.9797125950972107e-06, "loss": 0.4764, "step": 11125, "task_loss": 0.35213711857795715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36574220657348633, "epoch": 9.4, "learning_rate": 2.9754860524091296e-06, "loss": 0.409, "step": 11126, "task_loss": 0.9604750275611877 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49621087312698364, "epoch": 9.41, "learning_rate": 2.971259509721048e-06, "loss": 0.3818, "step": 11127, "task_loss": 0.5045230388641357 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4557150900363922, "epoch": 9.41, "learning_rate": 2.967032967032967e-06, "loss": 0.3404, "step": 11128, "task_loss": 0.1360621154308319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.12398593872785568, "epoch": 9.41, "learning_rate": 2.962806424344886e-06, "loss": 0.273, "step": 11129, "task_loss": 0.007481226231902838 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2632799744606018, "epoch": 9.41, "learning_rate": 2.9585798816568047e-06, "loss": 0.3894, "step": 11130, "task_loss": 0.4044606685638428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45583659410476685, "epoch": 9.41, "learning_rate": 2.9543533389687236e-06, "loss": 0.3836, "step": 11131, "task_loss": 1.7014849185943604 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2961050271987915, "epoch": 9.41, "learning_rate": 2.950126796280643e-06, "loss": 0.3258, "step": 11132, "task_loss": 1.3750492334365845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23177145421504974, "epoch": 9.41, "learning_rate": 2.9459002535925614e-06, "loss": 0.2053, "step": 11133, "task_loss": 0.27053189277648926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4508607089519501, "epoch": 9.41, "learning_rate": 2.9416737109044803e-06, "loss": 0.3504, "step": 11134, "task_loss": 1.0556132793426514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3256028890609741, "epoch": 9.41, "learning_rate": 2.937447168216399e-06, "loss": 0.3676, "step": 11135, "task_loss": 0.6541071534156799 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3309953212738037, "epoch": 9.41, "learning_rate": 2.933220625528318e-06, "loss": 0.3615, "step": 11136, "task_loss": 0.6462727189064026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20459747314453125, "epoch": 9.41, "learning_rate": 2.928994082840237e-06, "loss": 0.2526, "step": 11137, "task_loss": 0.4076334834098816 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1990799754858017, "epoch": 9.41, "learning_rate": 2.924767540152156e-06, "loss": 0.312, "step": 11138, "task_loss": 0.3357452154159546 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2570980191230774, "epoch": 9.42, "learning_rate": 2.9205409974640747e-06, "loss": 0.4303, "step": 11139, "task_loss": 0.7876008152961731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3088034987449646, "epoch": 9.42, "learning_rate": 2.9163144547759936e-06, "loss": 0.3194, "step": 11140, "task_loss": 0.5289038419723511 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35589104890823364, "epoch": 9.42, "learning_rate": 2.912087912087912e-06, "loss": 0.4739, "step": 11141, "task_loss": 0.40654855966567993 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2923547327518463, "epoch": 9.42, "learning_rate": 2.907861369399831e-06, "loss": 0.3831, "step": 11142, "task_loss": 0.27605077624320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43648242950439453, "epoch": 9.42, "learning_rate": 2.90363482671175e-06, "loss": 0.3481, "step": 11143, "task_loss": 0.24743427336215973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25060001015663147, "epoch": 9.42, "learning_rate": 2.8994082840236688e-06, "loss": 0.4096, "step": 11144, "task_loss": 0.6989130973815918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37293773889541626, "epoch": 9.42, "learning_rate": 2.8951817413355877e-06, "loss": 0.3446, "step": 11145, "task_loss": 0.7791764140129089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4002939462661743, "epoch": 9.42, "learning_rate": 2.8909551986475065e-06, "loss": 0.3837, "step": 11146, "task_loss": 1.1386535167694092 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3432043194770813, "epoch": 9.42, "learning_rate": 2.8867286559594254e-06, "loss": 0.31, "step": 11147, "task_loss": 0.3063526451587677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31005948781967163, "epoch": 9.42, "learning_rate": 2.8825021132713443e-06, "loss": 0.369, "step": 11148, "task_loss": 0.18603965640068054 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38781529664993286, "epoch": 9.42, "learning_rate": 2.878275570583263e-06, "loss": 0.3884, "step": 11149, "task_loss": 0.8698360323905945 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49873051047325134, "epoch": 9.42, "learning_rate": 2.8740490278951817e-06, "loss": 0.3806, "step": 11150, "task_loss": 0.5676694512367249 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23108352720737457, "epoch": 9.43, "learning_rate": 2.8698224852071006e-06, "loss": 0.3083, "step": 11151, "task_loss": 0.49526727199554443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3350542187690735, "epoch": 9.43, "learning_rate": 2.8655959425190195e-06, "loss": 0.3731, "step": 11152, "task_loss": 0.6679126620292664 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3656049072742462, "epoch": 9.43, "learning_rate": 2.8613693998309384e-06, "loss": 0.3294, "step": 11153, "task_loss": 0.2707204222679138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2533494234085083, "epoch": 9.43, "learning_rate": 2.8571428571428573e-06, "loss": 0.2786, "step": 11154, "task_loss": 0.8871655464172363 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3235633969306946, "epoch": 9.43, "learning_rate": 2.852916314454776e-06, "loss": 0.36, "step": 11155, "task_loss": 0.14663556218147278 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3071832060813904, "epoch": 9.43, "learning_rate": 2.848689771766695e-06, "loss": 0.2851, "step": 11156, "task_loss": 0.5383762717247009 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28878065943717957, "epoch": 9.43, "learning_rate": 2.8444632290786135e-06, "loss": 0.3209, "step": 11157, "task_loss": 0.7168927192687988 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4282752275466919, "epoch": 9.43, "learning_rate": 2.8402366863905324e-06, "loss": 0.444, "step": 11158, "task_loss": 0.6458749175071716 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3900154232978821, "epoch": 9.43, "learning_rate": 2.8360101437024513e-06, "loss": 0.4018, "step": 11159, "task_loss": 1.3545957803726196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1885564923286438, "epoch": 9.43, "learning_rate": 2.83178360101437e-06, "loss": 0.2824, "step": 11160, "task_loss": 0.17722372710704803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.191127747297287, "epoch": 9.43, "learning_rate": 2.827557058326289e-06, "loss": 0.3312, "step": 11161, "task_loss": 0.5437976717948914 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2990482449531555, "epoch": 9.44, "learning_rate": 2.8233305156382084e-06, "loss": 0.3346, "step": 11162, "task_loss": 0.7536722421646118 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35889384150505066, "epoch": 9.44, "learning_rate": 2.8191039729501273e-06, "loss": 0.3899, "step": 11163, "task_loss": 0.6088002324104309 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29134809970855713, "epoch": 9.44, "learning_rate": 2.8148774302620457e-06, "loss": 0.3064, "step": 11164, "task_loss": 1.0273517370224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13257403671741486, "epoch": 9.44, "learning_rate": 2.8106508875739646e-06, "loss": 0.3117, "step": 11165, "task_loss": 0.11883401870727539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.406500905752182, "epoch": 9.44, "learning_rate": 2.8064243448858835e-06, "loss": 0.3693, "step": 11166, "task_loss": 0.29360702633857727 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2889021635055542, "epoch": 9.44, "learning_rate": 2.8021978021978024e-06, "loss": 0.275, "step": 11167, "task_loss": 0.1887950599193573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3043598532676697, "epoch": 9.44, "learning_rate": 2.7979712595097213e-06, "loss": 0.3032, "step": 11168, "task_loss": 0.999653697013855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3810463547706604, "epoch": 9.44, "learning_rate": 2.79374471682164e-06, "loss": 0.3273, "step": 11169, "task_loss": 0.7576808929443359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3075995147228241, "epoch": 9.44, "learning_rate": 2.789518174133559e-06, "loss": 0.505, "step": 11170, "task_loss": 0.9707009792327881 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2421969771385193, "epoch": 9.44, "learning_rate": 2.785291631445478e-06, "loss": 0.2542, "step": 11171, "task_loss": 0.5644077658653259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2443886548280716, "epoch": 9.44, "learning_rate": 2.7810650887573965e-06, "loss": 0.3005, "step": 11172, "task_loss": 0.7673459649085999 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3950432538986206, "epoch": 9.44, "learning_rate": 2.7768385460693153e-06, "loss": 0.3732, "step": 11173, "task_loss": 0.19618147611618042 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19636203348636627, "epoch": 9.45, "learning_rate": 2.7726120033812342e-06, "loss": 0.3113, "step": 11174, "task_loss": 0.21818137168884277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2605751156806946, "epoch": 9.45, "learning_rate": 2.768385460693153e-06, "loss": 0.3423, "step": 11175, "task_loss": 0.8324499726295471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18753059208393097, "epoch": 9.45, "learning_rate": 2.764158918005072e-06, "loss": 0.3201, "step": 11176, "task_loss": 0.3602069616317749 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5155521035194397, "epoch": 9.45, "learning_rate": 2.759932375316991e-06, "loss": 0.3462, "step": 11177, "task_loss": 0.4848915636539459 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28499725461006165, "epoch": 9.45, "learning_rate": 2.75570583262891e-06, "loss": 0.3213, "step": 11178, "task_loss": 0.24565193057060242 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40402278304100037, "epoch": 9.45, "learning_rate": 2.7514792899408287e-06, "loss": 0.3392, "step": 11179, "task_loss": 0.8326399922370911 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46275094151496887, "epoch": 9.45, "learning_rate": 2.747252747252747e-06, "loss": 0.4173, "step": 11180, "task_loss": 0.36680543422698975 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24392396211624146, "epoch": 9.45, "learning_rate": 2.743026204564666e-06, "loss": 0.2465, "step": 11181, "task_loss": 0.30823227763175964 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44473177194595337, "epoch": 9.45, "learning_rate": 2.738799661876585e-06, "loss": 0.4163, "step": 11182, "task_loss": 0.3216438591480255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2071896344423294, "epoch": 9.45, "learning_rate": 2.734573119188504e-06, "loss": 0.2682, "step": 11183, "task_loss": 0.20310378074645996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3409462571144104, "epoch": 9.45, "learning_rate": 2.7303465765004227e-06, "loss": 0.3783, "step": 11184, "task_loss": 0.08326666057109833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3120713233947754, "epoch": 9.45, "learning_rate": 2.7261200338123416e-06, "loss": 0.4099, "step": 11185, "task_loss": 0.7058430910110474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13359495997428894, "epoch": 9.46, "learning_rate": 2.7218934911242605e-06, "loss": 0.2778, "step": 11186, "task_loss": 0.027225244790315628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41171905398368835, "epoch": 9.46, "learning_rate": 2.7176669484361794e-06, "loss": 0.3132, "step": 11187, "task_loss": 0.622369110584259 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3047202229499817, "epoch": 9.46, "learning_rate": 2.713440405748098e-06, "loss": 0.3383, "step": 11188, "task_loss": 0.9664789438247681 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21926406025886536, "epoch": 9.46, "learning_rate": 2.7092138630600168e-06, "loss": 0.2802, "step": 11189, "task_loss": 1.006090521812439 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3817671835422516, "epoch": 9.46, "learning_rate": 2.7049873203719357e-06, "loss": 0.3014, "step": 11190, "task_loss": 1.0172269344329834 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4449467062950134, "epoch": 9.46, "learning_rate": 2.700760777683855e-06, "loss": 0.3543, "step": 11191, "task_loss": 0.3825158178806305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4292738735675812, "epoch": 9.46, "learning_rate": 2.696534234995774e-06, "loss": 0.4496, "step": 11192, "task_loss": 0.8003434538841248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24324774742126465, "epoch": 9.46, "learning_rate": 2.6923076923076928e-06, "loss": 0.3683, "step": 11193, "task_loss": 0.27464956045150757 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.394970566034317, "epoch": 9.46, "learning_rate": 2.6880811496196112e-06, "loss": 0.328, "step": 11194, "task_loss": 1.0642701387405396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4030565917491913, "epoch": 9.46, "learning_rate": 2.68385460693153e-06, "loss": 0.2957, "step": 11195, "task_loss": 1.3650078773498535 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17747089266777039, "epoch": 9.46, "learning_rate": 2.679628064243449e-06, "loss": 0.3279, "step": 11196, "task_loss": 0.3820453882217407 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4664587080478668, "epoch": 9.46, "learning_rate": 2.675401521555368e-06, "loss": 0.3248, "step": 11197, "task_loss": 0.48224613070487976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2361564338207245, "epoch": 9.47, "learning_rate": 2.671174978867287e-06, "loss": 0.2622, "step": 11198, "task_loss": 0.25270190834999084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3353045582771301, "epoch": 9.47, "learning_rate": 2.6669484361792057e-06, "loss": 0.318, "step": 11199, "task_loss": 0.4183235764503479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.262408584356308, "epoch": 9.47, "learning_rate": 2.6627218934911246e-06, "loss": 0.3502, "step": 11200, "task_loss": 0.6557337045669556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35873621702194214, "epoch": 9.47, "learning_rate": 2.6584953508030435e-06, "loss": 0.3308, "step": 11201, "task_loss": 0.6011322736740112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34558629989624023, "epoch": 9.47, "learning_rate": 2.654268808114962e-06, "loss": 0.3915, "step": 11202, "task_loss": 0.04240553081035614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4079582989215851, "epoch": 9.47, "learning_rate": 2.650042265426881e-06, "loss": 0.3423, "step": 11203, "task_loss": 0.8091160655021667 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3989418148994446, "epoch": 9.47, "learning_rate": 2.6458157227387997e-06, "loss": 0.3648, "step": 11204, "task_loss": 1.2211722135543823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22336331009864807, "epoch": 9.47, "learning_rate": 2.6415891800507186e-06, "loss": 0.3255, "step": 11205, "task_loss": 0.15671966969966888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22267872095108032, "epoch": 9.47, "learning_rate": 2.6373626373626375e-06, "loss": 0.2524, "step": 11206, "task_loss": 0.28931641578674316 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2927022874355316, "epoch": 9.47, "learning_rate": 2.6331360946745564e-06, "loss": 0.3034, "step": 11207, "task_loss": 0.9644102454185486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4131059944629669, "epoch": 9.47, "learning_rate": 2.6289095519864753e-06, "loss": 0.3307, "step": 11208, "task_loss": 0.6778172254562378 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3743073046207428, "epoch": 9.47, "learning_rate": 2.624683009298394e-06, "loss": 0.3455, "step": 11209, "task_loss": 0.5322973728179932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3584333658218384, "epoch": 9.48, "learning_rate": 2.6204564666103126e-06, "loss": 0.3204, "step": 11210, "task_loss": 1.0458983182907104 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20761582255363464, "epoch": 9.48, "learning_rate": 2.6162299239222315e-06, "loss": 0.3786, "step": 11211, "task_loss": 0.045559678226709366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36495184898376465, "epoch": 9.48, "learning_rate": 2.6120033812341504e-06, "loss": 0.3919, "step": 11212, "task_loss": 0.5553227663040161 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5865755677223206, "epoch": 9.48, "learning_rate": 2.6077768385460693e-06, "loss": 0.4001, "step": 11213, "task_loss": 0.8763982057571411 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48601412773132324, "epoch": 9.48, "learning_rate": 2.603550295857988e-06, "loss": 0.4276, "step": 11214, "task_loss": 0.4793255627155304 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.203613743185997, "epoch": 9.48, "learning_rate": 2.599323753169907e-06, "loss": 0.3254, "step": 11215, "task_loss": 0.2487459033727646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49443572759628296, "epoch": 9.48, "learning_rate": 2.595097210481826e-06, "loss": 0.4331, "step": 11216, "task_loss": 1.4443079233169556 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42447954416275024, "epoch": 9.48, "learning_rate": 2.590870667793745e-06, "loss": 0.3893, "step": 11217, "task_loss": 0.07932371646165848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3816227316856384, "epoch": 9.48, "learning_rate": 2.5866441251056634e-06, "loss": 0.3588, "step": 11218, "task_loss": 0.41712573170661926 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.586235523223877, "epoch": 9.48, "learning_rate": 2.5824175824175822e-06, "loss": 0.4347, "step": 11219, "task_loss": 0.8165228962898254 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5157811641693115, "epoch": 9.48, "learning_rate": 2.578191039729501e-06, "loss": 0.3712, "step": 11220, "task_loss": 0.5261442065238953 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6986539363861084, "epoch": 9.48, "learning_rate": 2.5739644970414204e-06, "loss": 0.5047, "step": 11221, "task_loss": 1.1466091871261597 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23622068762779236, "epoch": 9.49, "learning_rate": 2.5697379543533393e-06, "loss": 0.3423, "step": 11222, "task_loss": 0.3911498188972473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21634319424629211, "epoch": 9.49, "learning_rate": 2.5655114116652582e-06, "loss": 0.3416, "step": 11223, "task_loss": 0.07129547744989395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4544490575790405, "epoch": 9.49, "learning_rate": 2.561284868977177e-06, "loss": 0.4354, "step": 11224, "task_loss": 1.3349862098693848 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22212593257427216, "epoch": 9.49, "learning_rate": 2.5570583262890956e-06, "loss": 0.3817, "step": 11225, "task_loss": 0.2652691602706909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37907928228378296, "epoch": 9.49, "learning_rate": 2.5528317836010145e-06, "loss": 0.3415, "step": 11226, "task_loss": 0.8815335035324097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39877671003341675, "epoch": 9.49, "learning_rate": 2.5486052409129334e-06, "loss": 0.3113, "step": 11227, "task_loss": 0.6111629009246826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3368489146232605, "epoch": 9.49, "learning_rate": 2.5443786982248523e-06, "loss": 0.2438, "step": 11228, "task_loss": 0.23493146896362305 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21482357382774353, "epoch": 9.49, "learning_rate": 2.540152155536771e-06, "loss": 0.3796, "step": 11229, "task_loss": 0.4439648389816284 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2962871193885803, "epoch": 9.49, "learning_rate": 2.53592561284869e-06, "loss": 0.4066, "step": 11230, "task_loss": 0.3677087426185608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23566138744354248, "epoch": 9.49, "learning_rate": 2.531699070160609e-06, "loss": 0.3729, "step": 11231, "task_loss": 0.14846713840961456 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3625061511993408, "epoch": 9.49, "learning_rate": 2.527472527472528e-06, "loss": 0.4053, "step": 11232, "task_loss": 0.7605141997337341 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47550538182258606, "epoch": 9.5, "learning_rate": 2.5232459847844463e-06, "loss": 0.3482, "step": 11233, "task_loss": 0.5712429881095886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5889968872070312, "epoch": 9.5, "learning_rate": 2.519019442096365e-06, "loss": 0.4103, "step": 11234, "task_loss": 0.6373184323310852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.306999146938324, "epoch": 9.5, "learning_rate": 2.514792899408284e-06, "loss": 0.2974, "step": 11235, "task_loss": 1.1315233707427979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47511088848114014, "epoch": 9.5, "learning_rate": 2.510566356720203e-06, "loss": 0.3017, "step": 11236, "task_loss": 0.5258234739303589 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23093372583389282, "epoch": 9.5, "learning_rate": 2.506339814032122e-06, "loss": 0.2415, "step": 11237, "task_loss": 0.1555791199207306 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49257826805114746, "epoch": 9.5, "learning_rate": 2.5021132713440408e-06, "loss": 0.2845, "step": 11238, "task_loss": 0.9504099488258362 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2972797453403473, "epoch": 9.5, "learning_rate": 2.4978867286559597e-06, "loss": 0.2845, "step": 11239, "task_loss": 0.7512521743774414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40362173318862915, "epoch": 9.5, "learning_rate": 2.4936601859678785e-06, "loss": 0.4071, "step": 11240, "task_loss": 0.7546878457069397 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.645882248878479, "epoch": 9.5, "learning_rate": 2.489433643279797e-06, "loss": 0.3855, "step": 11241, "task_loss": 0.7352324724197388 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26476460695266724, "epoch": 9.5, "learning_rate": 2.485207100591716e-06, "loss": 0.3099, "step": 11242, "task_loss": 0.38644272089004517 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48864278197288513, "epoch": 9.5, "learning_rate": 2.480980557903635e-06, "loss": 0.3817, "step": 11243, "task_loss": 1.1041029691696167 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3022564947605133, "epoch": 9.5, "learning_rate": 2.4767540152155537e-06, "loss": 0.2858, "step": 11244, "task_loss": 0.3329137861728668 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24785397946834564, "epoch": 9.51, "learning_rate": 2.4725274725274726e-06, "loss": 0.2989, "step": 11245, "task_loss": 0.32285284996032715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32519960403442383, "epoch": 9.51, "learning_rate": 2.4683009298393915e-06, "loss": 0.2943, "step": 11246, "task_loss": 0.24122153222560883 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2732008695602417, "epoch": 9.51, "learning_rate": 2.4640743871513104e-06, "loss": 0.3719, "step": 11247, "task_loss": 0.7145626544952393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6260085105895996, "epoch": 9.51, "learning_rate": 2.4598478444632293e-06, "loss": 0.3501, "step": 11248, "task_loss": 0.9510791301727295 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27141547203063965, "epoch": 9.51, "learning_rate": 2.4556213017751477e-06, "loss": 0.3061, "step": 11249, "task_loss": 0.13078004121780396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48791682720184326, "epoch": 9.51, "learning_rate": 2.4513947590870666e-06, "loss": 0.3337, "step": 11250, "task_loss": 0.456876277923584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3203643262386322, "epoch": 9.51, "learning_rate": 2.447168216398986e-06, "loss": 0.4589, "step": 11251, "task_loss": 0.9103769659996033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37787556648254395, "epoch": 9.51, "learning_rate": 2.442941673710905e-06, "loss": 0.3822, "step": 11252, "task_loss": 0.44761815667152405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34854191541671753, "epoch": 9.51, "learning_rate": 2.4387151310228237e-06, "loss": 0.3136, "step": 11253, "task_loss": 0.5957052707672119 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26073092222213745, "epoch": 9.51, "learning_rate": 2.4344885883347426e-06, "loss": 0.3113, "step": 11254, "task_loss": 0.5697309374809265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1848655641078949, "epoch": 9.51, "learning_rate": 2.430262045646661e-06, "loss": 0.3014, "step": 11255, "task_loss": 0.7200050950050354 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17946857213974, "epoch": 9.51, "learning_rate": 2.42603550295858e-06, "loss": 0.3607, "step": 11256, "task_loss": 0.3222772479057312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.247155100107193, "epoch": 9.52, "learning_rate": 2.421808960270499e-06, "loss": 0.3102, "step": 11257, "task_loss": 0.11182907968759537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3045231103897095, "epoch": 9.52, "learning_rate": 2.4175824175824177e-06, "loss": 0.4454, "step": 11258, "task_loss": 0.39615824818611145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3244926929473877, "epoch": 9.52, "learning_rate": 2.4133558748943366e-06, "loss": 0.3419, "step": 11259, "task_loss": 1.0087002515792847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1687776744365692, "epoch": 9.52, "learning_rate": 2.4091293322062555e-06, "loss": 0.2594, "step": 11260, "task_loss": 0.15563015639781952 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30812138319015503, "epoch": 9.52, "learning_rate": 2.4049027895181744e-06, "loss": 0.3729, "step": 11261, "task_loss": 0.5975702404975891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3309425711631775, "epoch": 9.52, "learning_rate": 2.4006762468300933e-06, "loss": 0.3504, "step": 11262, "task_loss": 0.28101032972335815 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2739301323890686, "epoch": 9.52, "learning_rate": 2.3964497041420118e-06, "loss": 0.3313, "step": 11263, "task_loss": 0.7410649061203003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2618538737297058, "epoch": 9.52, "learning_rate": 2.3922231614539307e-06, "loss": 0.3267, "step": 11264, "task_loss": 0.4619693160057068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30512380599975586, "epoch": 9.52, "learning_rate": 2.3879966187658496e-06, "loss": 0.3803, "step": 11265, "task_loss": 0.4550963044166565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6370394825935364, "epoch": 9.52, "learning_rate": 2.3837700760777685e-06, "loss": 0.4296, "step": 11266, "task_loss": 1.361843466758728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3523854613304138, "epoch": 9.52, "learning_rate": 2.3795435333896873e-06, "loss": 0.2812, "step": 11267, "task_loss": 0.23397944867610931 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34427791833877563, "epoch": 9.52, "learning_rate": 2.3753169907016062e-06, "loss": 0.2746, "step": 11268, "task_loss": 0.3872188329696655 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27786725759506226, "epoch": 9.53, "learning_rate": 2.371090448013525e-06, "loss": 0.2863, "step": 11269, "task_loss": 0.4174913763999939 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29992711544036865, "epoch": 9.53, "learning_rate": 2.366863905325444e-06, "loss": 0.2748, "step": 11270, "task_loss": 0.2696077525615692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30999353528022766, "epoch": 9.53, "learning_rate": 2.3626373626373625e-06, "loss": 0.3082, "step": 11271, "task_loss": 0.9265884160995483 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2943624258041382, "epoch": 9.53, "learning_rate": 2.3584108199492814e-06, "loss": 0.2983, "step": 11272, "task_loss": 0.631165087223053 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3417455852031708, "epoch": 9.53, "learning_rate": 2.3541842772612003e-06, "loss": 0.2506, "step": 11273, "task_loss": 0.9288751482963562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39671215415000916, "epoch": 9.53, "learning_rate": 2.349957734573119e-06, "loss": 0.3405, "step": 11274, "task_loss": 0.7747163772583008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3482685387134552, "epoch": 9.53, "learning_rate": 2.345731191885038e-06, "loss": 0.5447, "step": 11275, "task_loss": 0.7401619553565979 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.435600608587265, "epoch": 9.53, "learning_rate": 2.341504649196957e-06, "loss": 0.356, "step": 11276, "task_loss": 0.16817057132720947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2593138813972473, "epoch": 9.53, "learning_rate": 2.337278106508876e-06, "loss": 0.3154, "step": 11277, "task_loss": 0.4653940796852112 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3357090353965759, "epoch": 9.53, "learning_rate": 2.3330515638207947e-06, "loss": 0.3413, "step": 11278, "task_loss": 1.2139111757278442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.257800817489624, "epoch": 9.53, "learning_rate": 2.328825021132713e-06, "loss": 0.2732, "step": 11279, "task_loss": 0.5855380892753601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31374263763427734, "epoch": 9.53, "learning_rate": 2.324598478444632e-06, "loss": 0.3099, "step": 11280, "task_loss": 0.155843123793602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4167914390563965, "epoch": 9.54, "learning_rate": 2.3203719357565514e-06, "loss": 0.3793, "step": 11281, "task_loss": 1.9477704763412476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4643925428390503, "epoch": 9.54, "learning_rate": 2.3161453930684703e-06, "loss": 0.4343, "step": 11282, "task_loss": 0.116689532995224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3383294939994812, "epoch": 9.54, "learning_rate": 2.311918850380389e-06, "loss": 0.364, "step": 11283, "task_loss": 1.1284469366073608 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22616912424564362, "epoch": 9.54, "learning_rate": 2.307692307692308e-06, "loss": 0.3293, "step": 11284, "task_loss": 0.3918701410293579 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4249834716320038, "epoch": 9.54, "learning_rate": 2.303465765004227e-06, "loss": 0.4101, "step": 11285, "task_loss": 1.1767102479934692 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.334256649017334, "epoch": 9.54, "learning_rate": 2.2992392223161454e-06, "loss": 0.421, "step": 11286, "task_loss": 0.5789220929145813 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3614867925643921, "epoch": 9.54, "learning_rate": 2.2950126796280643e-06, "loss": 0.4794, "step": 11287, "task_loss": 0.5564362406730652 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6991974711418152, "epoch": 9.54, "learning_rate": 2.2907861369399832e-06, "loss": 0.4562, "step": 11288, "task_loss": 1.6002708673477173 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3912792503833771, "epoch": 9.54, "learning_rate": 2.286559594251902e-06, "loss": 0.4135, "step": 11289, "task_loss": 0.6200382113456726 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3674837350845337, "epoch": 9.54, "learning_rate": 2.282333051563821e-06, "loss": 0.3653, "step": 11290, "task_loss": 0.25311601161956787 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21538691222667694, "epoch": 9.54, "learning_rate": 2.27810650887574e-06, "loss": 0.3212, "step": 11291, "task_loss": 0.2719196677207947 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3568977117538452, "epoch": 9.54, "learning_rate": 2.2738799661876588e-06, "loss": 0.394, "step": 11292, "task_loss": 0.46265581250190735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3504661023616791, "epoch": 9.55, "learning_rate": 2.2696534234995777e-06, "loss": 0.2946, "step": 11293, "task_loss": 0.7050178647041321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22982819378376007, "epoch": 9.55, "learning_rate": 2.265426880811496e-06, "loss": 0.3634, "step": 11294, "task_loss": 0.3301703631877899 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35973668098449707, "epoch": 9.55, "learning_rate": 2.261200338123415e-06, "loss": 0.3474, "step": 11295, "task_loss": 0.8677868247032166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36139997839927673, "epoch": 9.55, "learning_rate": 2.256973795435334e-06, "loss": 0.379, "step": 11296, "task_loss": 0.9272249937057495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45224395394325256, "epoch": 9.55, "learning_rate": 2.252747252747253e-06, "loss": 0.3255, "step": 11297, "task_loss": 0.23578180372714996 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.261436402797699, "epoch": 9.55, "learning_rate": 2.2485207100591717e-06, "loss": 0.4009, "step": 11298, "task_loss": 1.0435104370117188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16719746589660645, "epoch": 9.55, "learning_rate": 2.2442941673710906e-06, "loss": 0.281, "step": 11299, "task_loss": 0.2571280002593994 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46074315905570984, "epoch": 9.55, "learning_rate": 2.2400676246830095e-06, "loss": 0.3586, "step": 11300, "task_loss": 0.6485046744346619 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29971182346343994, "epoch": 9.55, "learning_rate": 2.2358410819949284e-06, "loss": 0.3926, "step": 11301, "task_loss": 0.3752540647983551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2050643414258957, "epoch": 9.55, "learning_rate": 2.231614539306847e-06, "loss": 0.3566, "step": 11302, "task_loss": 0.6496607661247253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37660926580429077, "epoch": 9.55, "learning_rate": 2.2273879966187657e-06, "loss": 0.4935, "step": 11303, "task_loss": 0.7593499422073364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39057713747024536, "epoch": 9.56, "learning_rate": 2.2231614539306846e-06, "loss": 0.454, "step": 11304, "task_loss": 0.7025313377380371 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2646704614162445, "epoch": 9.56, "learning_rate": 2.2189349112426035e-06, "loss": 0.3395, "step": 11305, "task_loss": 0.5825679302215576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21712934970855713, "epoch": 9.56, "learning_rate": 2.2147083685545224e-06, "loss": 0.3976, "step": 11306, "task_loss": 0.44474098086357117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6768749952316284, "epoch": 9.56, "learning_rate": 2.2104818258664413e-06, "loss": 0.4045, "step": 11307, "task_loss": 0.489572674036026 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3789549767971039, "epoch": 9.56, "learning_rate": 2.20625528317836e-06, "loss": 0.3003, "step": 11308, "task_loss": 0.43537437915802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5831117630004883, "epoch": 9.56, "learning_rate": 2.202028740490279e-06, "loss": 0.4524, "step": 11309, "task_loss": 1.1619058847427368 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33674752712249756, "epoch": 9.56, "learning_rate": 2.197802197802198e-06, "loss": 0.335, "step": 11310, "task_loss": 0.5147825479507446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4123269021511078, "epoch": 9.56, "learning_rate": 2.193575655114117e-06, "loss": 0.4734, "step": 11311, "task_loss": 0.5781444907188416 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.345676988363266, "epoch": 9.56, "learning_rate": 2.1893491124260358e-06, "loss": 0.3586, "step": 11312, "task_loss": 0.6409375071525574 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2583978772163391, "epoch": 9.56, "learning_rate": 2.1851225697379547e-06, "loss": 0.3008, "step": 11313, "task_loss": 0.7200506925582886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28517550230026245, "epoch": 9.56, "learning_rate": 2.1808960270498736e-06, "loss": 0.3608, "step": 11314, "task_loss": 1.1121327877044678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5476095676422119, "epoch": 9.56, "learning_rate": 2.1766694843617924e-06, "loss": 0.519, "step": 11315, "task_loss": 0.5626525282859802 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2948870062828064, "epoch": 9.57, "learning_rate": 2.172442941673711e-06, "loss": 0.2944, "step": 11316, "task_loss": 0.10477277636528015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3104032874107361, "epoch": 9.57, "learning_rate": 2.16821639898563e-06, "loss": 0.2905, "step": 11317, "task_loss": 0.12572573125362396 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30485475063323975, "epoch": 9.57, "learning_rate": 2.1639898562975487e-06, "loss": 0.2992, "step": 11318, "task_loss": 0.7191717624664307 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48069918155670166, "epoch": 9.57, "learning_rate": 2.1597633136094676e-06, "loss": 0.3559, "step": 11319, "task_loss": 1.0460033416748047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24722042679786682, "epoch": 9.57, "learning_rate": 2.1555367709213865e-06, "loss": 0.3397, "step": 11320, "task_loss": 0.5406808853149414 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42122042179107666, "epoch": 9.57, "learning_rate": 2.1513102282333054e-06, "loss": 0.4235, "step": 11321, "task_loss": 0.492661714553833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4442176818847656, "epoch": 9.57, "learning_rate": 2.1470836855452243e-06, "loss": 0.3109, "step": 11322, "task_loss": 0.5840321183204651 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39093416929244995, "epoch": 9.57, "learning_rate": 2.142857142857143e-06, "loss": 0.3028, "step": 11323, "task_loss": 0.42109596729278564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3499734699726105, "epoch": 9.57, "learning_rate": 2.1386306001690616e-06, "loss": 0.4729, "step": 11324, "task_loss": 0.11740142107009888 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3950878381729126, "epoch": 9.57, "learning_rate": 2.1344040574809805e-06, "loss": 0.2758, "step": 11325, "task_loss": 0.5653151273727417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46087199449539185, "epoch": 9.57, "learning_rate": 2.1301775147928994e-06, "loss": 0.4125, "step": 11326, "task_loss": 0.5265005826950073 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3211498558521271, "epoch": 9.57, "learning_rate": 2.1259509721048183e-06, "loss": 0.3471, "step": 11327, "task_loss": 0.18026238679885864 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37499815225601196, "epoch": 9.58, "learning_rate": 2.121724429416737e-06, "loss": 0.4197, "step": 11328, "task_loss": 0.6027863621711731 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27312034368515015, "epoch": 9.58, "learning_rate": 2.117497886728656e-06, "loss": 0.2502, "step": 11329, "task_loss": 0.3918531537055969 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3683964014053345, "epoch": 9.58, "learning_rate": 2.113271344040575e-06, "loss": 0.3688, "step": 11330, "task_loss": 0.3580619990825653 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40159040689468384, "epoch": 9.58, "learning_rate": 2.109044801352494e-06, "loss": 0.3839, "step": 11331, "task_loss": 0.7435581684112549 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22182714939117432, "epoch": 9.58, "learning_rate": 2.1048182586644123e-06, "loss": 0.2732, "step": 11332, "task_loss": 0.2184019684791565 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2464931458234787, "epoch": 9.58, "learning_rate": 2.1005917159763312e-06, "loss": 0.3398, "step": 11333, "task_loss": 0.44217413663864136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25661230087280273, "epoch": 9.58, "learning_rate": 2.09636517328825e-06, "loss": 0.2607, "step": 11334, "task_loss": 0.12492145597934723 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2948495149612427, "epoch": 9.58, "learning_rate": 2.092138630600169e-06, "loss": 0.3299, "step": 11335, "task_loss": 0.5423761606216431 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5118556022644043, "epoch": 9.58, "learning_rate": 2.087912087912088e-06, "loss": 0.44, "step": 11336, "task_loss": 1.2480664253234863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1786273568868637, "epoch": 9.58, "learning_rate": 2.083685545224007e-06, "loss": 0.3445, "step": 11337, "task_loss": 0.2532385289669037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2957235276699066, "epoch": 9.58, "learning_rate": 2.0794590025359257e-06, "loss": 0.3472, "step": 11338, "task_loss": 1.193177580833435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32421043515205383, "epoch": 9.58, "learning_rate": 2.0752324598478446e-06, "loss": 0.3456, "step": 11339, "task_loss": 0.6723638772964478 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7096904516220093, "epoch": 9.59, "learning_rate": 2.0710059171597635e-06, "loss": 0.5919, "step": 11340, "task_loss": 1.3342965841293335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22693704068660736, "epoch": 9.59, "learning_rate": 2.0667793744716824e-06, "loss": 0.4114, "step": 11341, "task_loss": 0.17792680859565735 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2959129214286804, "epoch": 9.59, "learning_rate": 2.0625528317836012e-06, "loss": 0.4811, "step": 11342, "task_loss": 0.21198199689388275 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19234037399291992, "epoch": 9.59, "learning_rate": 2.05832628909552e-06, "loss": 0.3488, "step": 11343, "task_loss": 0.10808181017637253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27044084668159485, "epoch": 9.59, "learning_rate": 2.054099746407439e-06, "loss": 0.3654, "step": 11344, "task_loss": 1.0489733219146729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3765343725681305, "epoch": 9.59, "learning_rate": 2.049873203719358e-06, "loss": 0.3137, "step": 11345, "task_loss": 0.9310837388038635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38879409432411194, "epoch": 9.59, "learning_rate": 2.045646661031277e-06, "loss": 0.4871, "step": 11346, "task_loss": 1.2024022340774536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2554311454296112, "epoch": 9.59, "learning_rate": 2.0414201183431953e-06, "loss": 0.3434, "step": 11347, "task_loss": 0.5166749358177185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5394352078437805, "epoch": 9.59, "learning_rate": 2.037193575655114e-06, "loss": 0.3726, "step": 11348, "task_loss": 1.0180165767669678 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18100568652153015, "epoch": 9.59, "learning_rate": 2.032967032967033e-06, "loss": 0.3092, "step": 11349, "task_loss": 0.23527662456035614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35804274678230286, "epoch": 9.59, "learning_rate": 2.028740490278952e-06, "loss": 0.3171, "step": 11350, "task_loss": 1.0076273679733276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21199451386928558, "epoch": 9.59, "learning_rate": 2.024513947590871e-06, "loss": 0.3179, "step": 11351, "task_loss": 0.05430179089307785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39287057518959045, "epoch": 9.6, "learning_rate": 2.0202874049027897e-06, "loss": 0.4013, "step": 11352, "task_loss": 0.37391263246536255 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3621377646923065, "epoch": 9.6, "learning_rate": 2.0160608622147086e-06, "loss": 0.4331, "step": 11353, "task_loss": 0.31450536847114563 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2855967581272125, "epoch": 9.6, "learning_rate": 2.0118343195266275e-06, "loss": 0.3319, "step": 11354, "task_loss": 0.7733631134033203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3082992434501648, "epoch": 9.6, "learning_rate": 2.007607776838546e-06, "loss": 0.3981, "step": 11355, "task_loss": 1.32911217212677 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3519038259983063, "epoch": 9.6, "learning_rate": 2.003381234150465e-06, "loss": 0.3478, "step": 11356, "task_loss": 0.7286037802696228 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2534392774105072, "epoch": 9.6, "learning_rate": 1.9991546914623838e-06, "loss": 0.2666, "step": 11357, "task_loss": 0.22844351828098297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4133428931236267, "epoch": 9.6, "learning_rate": 1.9949281487743027e-06, "loss": 0.4222, "step": 11358, "task_loss": 1.2354899644851685 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42720797657966614, "epoch": 9.6, "learning_rate": 1.9907016060862216e-06, "loss": 0.3185, "step": 11359, "task_loss": 0.29582083225250244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30216288566589355, "epoch": 9.6, "learning_rate": 1.9864750633981404e-06, "loss": 0.3569, "step": 11360, "task_loss": 0.7684584856033325 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19312812387943268, "epoch": 9.6, "learning_rate": 1.9822485207100593e-06, "loss": 0.2425, "step": 11361, "task_loss": 0.13954511284828186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24762722849845886, "epoch": 9.6, "learning_rate": 1.9780219780219782e-06, "loss": 0.3128, "step": 11362, "task_loss": 1.152559518814087 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2648628056049347, "epoch": 9.6, "learning_rate": 1.9737954353338967e-06, "loss": 0.3865, "step": 11363, "task_loss": 0.4906817674636841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5737491250038147, "epoch": 9.61, "learning_rate": 1.9695688926458156e-06, "loss": 0.3822, "step": 11364, "task_loss": 0.632621705532074 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3169782757759094, "epoch": 9.61, "learning_rate": 1.9653423499577345e-06, "loss": 0.3805, "step": 11365, "task_loss": 0.5344615578651428 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2742988169193268, "epoch": 9.61, "learning_rate": 1.9611158072696534e-06, "loss": 0.3415, "step": 11366, "task_loss": 0.5961724519729614 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24963140487670898, "epoch": 9.61, "learning_rate": 1.9568892645815723e-06, "loss": 0.2595, "step": 11367, "task_loss": 0.23455478250980377 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23349100351333618, "epoch": 9.61, "learning_rate": 1.952662721893491e-06, "loss": 0.3106, "step": 11368, "task_loss": 0.4577588438987732 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2879914939403534, "epoch": 9.61, "learning_rate": 1.94843617920541e-06, "loss": 0.3557, "step": 11369, "task_loss": 0.9814213514328003 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19712060689926147, "epoch": 9.61, "learning_rate": 1.944209636517329e-06, "loss": 0.3086, "step": 11370, "task_loss": 0.039380405098199844 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2103244960308075, "epoch": 9.61, "learning_rate": 1.939983093829248e-06, "loss": 0.3082, "step": 11371, "task_loss": 0.018880458548665047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5503775477409363, "epoch": 9.61, "learning_rate": 1.9357565511411667e-06, "loss": 0.5378, "step": 11372, "task_loss": 0.29670798778533936 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21757550537586212, "epoch": 9.61, "learning_rate": 1.9315300084530856e-06, "loss": 0.277, "step": 11373, "task_loss": 0.5100223422050476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34573304653167725, "epoch": 9.61, "learning_rate": 1.9273034657650045e-06, "loss": 0.3425, "step": 11374, "task_loss": 0.46197354793548584 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3948034644126892, "epoch": 9.61, "learning_rate": 1.9230769230769234e-06, "loss": 0.4335, "step": 11375, "task_loss": 0.960390031337738 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47717419266700745, "epoch": 9.62, "learning_rate": 1.9188503803888423e-06, "loss": 0.3592, "step": 11376, "task_loss": 0.6879274845123291 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21797139942646027, "epoch": 9.62, "learning_rate": 1.9146238377007608e-06, "loss": 0.2583, "step": 11377, "task_loss": 0.10829822719097137 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21399691700935364, "epoch": 9.62, "learning_rate": 1.9103972950126796e-06, "loss": 0.2461, "step": 11378, "task_loss": 0.18880638480186462 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28747397661209106, "epoch": 9.62, "learning_rate": 1.9061707523245985e-06, "loss": 0.29, "step": 11379, "task_loss": 0.24079172313213348 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30355891585350037, "epoch": 9.62, "learning_rate": 1.9019442096365174e-06, "loss": 0.3047, "step": 11380, "task_loss": 0.32455843687057495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.452084481716156, "epoch": 9.62, "learning_rate": 1.8977176669484363e-06, "loss": 0.3538, "step": 11381, "task_loss": 0.7939251065254211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3114120364189148, "epoch": 9.62, "learning_rate": 1.8934911242603552e-06, "loss": 0.3864, "step": 11382, "task_loss": 0.4872196912765503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.423596054315567, "epoch": 9.62, "learning_rate": 1.889264581572274e-06, "loss": 0.4635, "step": 11383, "task_loss": 1.2600687742233276 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5550791621208191, "epoch": 9.62, "learning_rate": 1.8850380388841928e-06, "loss": 0.4827, "step": 11384, "task_loss": 0.28835129737854004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42487403750419617, "epoch": 9.62, "learning_rate": 1.8808114961961117e-06, "loss": 0.5077, "step": 11385, "task_loss": 0.8663027882575989 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3729102611541748, "epoch": 9.62, "learning_rate": 1.8765849535080306e-06, "loss": 0.4624, "step": 11386, "task_loss": 0.9108969569206238 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3118417263031006, "epoch": 9.63, "learning_rate": 1.8723584108199492e-06, "loss": 0.3623, "step": 11387, "task_loss": 0.9128273129463196 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35142892599105835, "epoch": 9.63, "learning_rate": 1.8681318681318681e-06, "loss": 0.2891, "step": 11388, "task_loss": 0.3188212215900421 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49902379512786865, "epoch": 9.63, "learning_rate": 1.863905325443787e-06, "loss": 0.36, "step": 11389, "task_loss": 0.92162024974823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3398239314556122, "epoch": 9.63, "learning_rate": 1.859678782755706e-06, "loss": 0.3131, "step": 11390, "task_loss": 0.29287195205688477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.67226243019104, "epoch": 9.63, "learning_rate": 1.8554522400676246e-06, "loss": 0.4469, "step": 11391, "task_loss": 1.0422295331954956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27652254700660706, "epoch": 9.63, "learning_rate": 1.8512256973795435e-06, "loss": 0.3273, "step": 11392, "task_loss": 0.2765454649925232 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2788563370704651, "epoch": 9.63, "learning_rate": 1.8469991546914624e-06, "loss": 0.4269, "step": 11393, "task_loss": 0.4959866404533386 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18035463988780975, "epoch": 9.63, "learning_rate": 1.8427726120033813e-06, "loss": 0.2439, "step": 11394, "task_loss": 0.3229225277900696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37946566939353943, "epoch": 9.63, "learning_rate": 1.8385460693153e-06, "loss": 0.3984, "step": 11395, "task_loss": 0.28168636560440063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22433197498321533, "epoch": 9.63, "learning_rate": 1.8343195266272188e-06, "loss": 0.3312, "step": 11396, "task_loss": 0.0423276424407959 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3201439380645752, "epoch": 9.63, "learning_rate": 1.8300929839391377e-06, "loss": 0.3414, "step": 11397, "task_loss": 0.2736188769340515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18343813717365265, "epoch": 9.63, "learning_rate": 1.8258664412510566e-06, "loss": 0.2783, "step": 11398, "task_loss": 0.8561438918113708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3910684287548065, "epoch": 9.64, "learning_rate": 1.8216398985629753e-06, "loss": 0.3657, "step": 11399, "task_loss": 0.3508215844631195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2434781938791275, "epoch": 9.64, "learning_rate": 1.8174133558748946e-06, "loss": 0.4263, "step": 11400, "task_loss": 0.5328945517539978 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2664779722690582, "epoch": 9.64, "learning_rate": 1.8131868131868135e-06, "loss": 0.2873, "step": 11401, "task_loss": 0.18951082229614258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395950198173523, "epoch": 9.64, "learning_rate": 1.8089602704987322e-06, "loss": 0.3927, "step": 11402, "task_loss": 0.22719445824623108 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28845998644828796, "epoch": 9.64, "learning_rate": 1.804733727810651e-06, "loss": 0.3296, "step": 11403, "task_loss": 0.6665111184120178 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3056619167327881, "epoch": 9.64, "learning_rate": 1.80050718512257e-06, "loss": 0.4058, "step": 11404, "task_loss": 0.7225939631462097 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1704697459936142, "epoch": 9.64, "learning_rate": 1.7962806424344889e-06, "loss": 0.3247, "step": 11405, "task_loss": 0.7161139249801636 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.235569030046463, "epoch": 9.64, "learning_rate": 1.7920540997464076e-06, "loss": 0.3337, "step": 11406, "task_loss": 0.21453885734081268 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1402040719985962, "epoch": 9.64, "learning_rate": 1.7878275570583264e-06, "loss": 0.3531, "step": 11407, "task_loss": 0.4320604205131531 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2700716257095337, "epoch": 9.64, "learning_rate": 1.7836010143702453e-06, "loss": 0.4085, "step": 11408, "task_loss": 0.4586601257324219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26214319467544556, "epoch": 9.64, "learning_rate": 1.7793744716821642e-06, "loss": 0.3216, "step": 11409, "task_loss": 0.22184021770954132 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3785628080368042, "epoch": 9.64, "learning_rate": 1.775147928994083e-06, "loss": 0.2913, "step": 11410, "task_loss": 0.23283220827579498 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2353890985250473, "epoch": 9.65, "learning_rate": 1.7709213863060018e-06, "loss": 0.3183, "step": 11411, "task_loss": 0.3049878180027008 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18935465812683105, "epoch": 9.65, "learning_rate": 1.7666948436179207e-06, "loss": 0.3174, "step": 11412, "task_loss": 0.7596449851989746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38802996277809143, "epoch": 9.65, "learning_rate": 1.7624683009298396e-06, "loss": 0.3615, "step": 11413, "task_loss": 0.6841916441917419 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4733309745788574, "epoch": 9.65, "learning_rate": 1.7582417582417583e-06, "loss": 0.3877, "step": 11414, "task_loss": 0.3482346534729004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2754071056842804, "epoch": 9.65, "learning_rate": 1.7540152155536772e-06, "loss": 0.3411, "step": 11415, "task_loss": 1.4552600383758545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3559128940105438, "epoch": 9.65, "learning_rate": 1.749788672865596e-06, "loss": 0.396, "step": 11416, "task_loss": 0.563217043876648 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2073933482170105, "epoch": 9.65, "learning_rate": 1.745562130177515e-06, "loss": 0.3898, "step": 11417, "task_loss": 0.4485001862049103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4541964530944824, "epoch": 9.65, "learning_rate": 1.7413355874894336e-06, "loss": 0.3379, "step": 11418, "task_loss": 0.17898324131965637 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3260287642478943, "epoch": 9.65, "learning_rate": 1.7371090448013525e-06, "loss": 0.4064, "step": 11419, "task_loss": 0.9959813356399536 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4553569555282593, "epoch": 9.65, "learning_rate": 1.7328825021132714e-06, "loss": 0.3656, "step": 11420, "task_loss": 0.9632174968719482 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37856072187423706, "epoch": 9.65, "learning_rate": 1.7286559594251903e-06, "loss": 0.3042, "step": 11421, "task_loss": 0.4925137758255005 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3226199746131897, "epoch": 9.65, "learning_rate": 1.724429416737109e-06, "loss": 0.338, "step": 11422, "task_loss": 0.19618257880210876 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40906471014022827, "epoch": 9.66, "learning_rate": 1.7202028740490279e-06, "loss": 0.3535, "step": 11423, "task_loss": 0.9799938201904297 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.288569837808609, "epoch": 9.66, "learning_rate": 1.7159763313609468e-06, "loss": 0.3266, "step": 11424, "task_loss": 0.462087482213974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33994609117507935, "epoch": 9.66, "learning_rate": 1.7117497886728656e-06, "loss": 0.4332, "step": 11425, "task_loss": 1.495188593864441 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2971399426460266, "epoch": 9.66, "learning_rate": 1.7075232459847843e-06, "loss": 0.3769, "step": 11426, "task_loss": 0.6118338704109192 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15497329831123352, "epoch": 9.66, "learning_rate": 1.7032967032967032e-06, "loss": 0.3367, "step": 11427, "task_loss": 0.8847630620002747 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3680981993675232, "epoch": 9.66, "learning_rate": 1.6990701606086221e-06, "loss": 0.3761, "step": 11428, "task_loss": 0.3482116460800171 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3056458830833435, "epoch": 9.66, "learning_rate": 1.6948436179205412e-06, "loss": 0.3412, "step": 11429, "task_loss": 0.7409135103225708 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23351219296455383, "epoch": 9.66, "learning_rate": 1.69061707523246e-06, "loss": 0.337, "step": 11430, "task_loss": 0.1401732861995697 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3785826563835144, "epoch": 9.66, "learning_rate": 1.686390532544379e-06, "loss": 0.3072, "step": 11431, "task_loss": 0.41874128580093384 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41313013434410095, "epoch": 9.66, "learning_rate": 1.6821639898562977e-06, "loss": 0.3431, "step": 11432, "task_loss": 0.8204946517944336 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.417153537273407, "epoch": 9.66, "learning_rate": 1.6779374471682166e-06, "loss": 0.3799, "step": 11433, "task_loss": 0.6660819053649902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1994740515947342, "epoch": 9.66, "learning_rate": 1.6737109044801355e-06, "loss": 0.3536, "step": 11434, "task_loss": 0.5244724154472351 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23480790853500366, "epoch": 9.67, "learning_rate": 1.6694843617920543e-06, "loss": 0.3074, "step": 11435, "task_loss": 0.03405332192778587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.224386528134346, "epoch": 9.67, "learning_rate": 1.665257819103973e-06, "loss": 0.4569, "step": 11436, "task_loss": 0.3570059537887573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.512499988079071, "epoch": 9.67, "learning_rate": 1.661031276415892e-06, "loss": 0.3387, "step": 11437, "task_loss": 1.376373291015625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3143555223941803, "epoch": 9.67, "learning_rate": 1.6568047337278108e-06, "loss": 0.3209, "step": 11438, "task_loss": 0.8242653012275696 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4584347605705261, "epoch": 9.67, "learning_rate": 1.6525781910397297e-06, "loss": 0.3966, "step": 11439, "task_loss": 1.094449520111084 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2787601053714752, "epoch": 9.67, "learning_rate": 1.6483516483516484e-06, "loss": 0.3488, "step": 11440, "task_loss": 0.29106733202934265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26899731159210205, "epoch": 9.67, "learning_rate": 1.6441251056635673e-06, "loss": 0.3397, "step": 11441, "task_loss": 0.5184072256088257 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27530956268310547, "epoch": 9.67, "learning_rate": 1.6398985629754862e-06, "loss": 0.395, "step": 11442, "task_loss": 0.6371501684188843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43390512466430664, "epoch": 9.67, "learning_rate": 1.635672020287405e-06, "loss": 0.3593, "step": 11443, "task_loss": 0.7042547464370728 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4595538377761841, "epoch": 9.67, "learning_rate": 1.6314454775993237e-06, "loss": 0.4018, "step": 11444, "task_loss": 1.0405006408691406 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24976295232772827, "epoch": 9.67, "learning_rate": 1.6272189349112426e-06, "loss": 0.3236, "step": 11445, "task_loss": 0.5751939415931702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26376330852508545, "epoch": 9.67, "learning_rate": 1.6229923922231615e-06, "loss": 0.3831, "step": 11446, "task_loss": 0.9888198375701904 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3995562493801117, "epoch": 9.68, "learning_rate": 1.6187658495350804e-06, "loss": 0.4051, "step": 11447, "task_loss": 0.503148078918457 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2619239091873169, "epoch": 9.68, "learning_rate": 1.614539306846999e-06, "loss": 0.3027, "step": 11448, "task_loss": 0.42259958386421204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2721227705478668, "epoch": 9.68, "learning_rate": 1.610312764158918e-06, "loss": 0.3326, "step": 11449, "task_loss": 0.402411550283432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21126922965049744, "epoch": 9.68, "learning_rate": 1.6060862214708369e-06, "loss": 0.325, "step": 11450, "task_loss": 0.44474369287490845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2600010335445404, "epoch": 9.68, "learning_rate": 1.6018596787827558e-06, "loss": 0.3593, "step": 11451, "task_loss": 0.6624702215194702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20123682916164398, "epoch": 9.68, "learning_rate": 1.5976331360946744e-06, "loss": 0.3656, "step": 11452, "task_loss": 0.6612902879714966 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.15425634384155273, "epoch": 9.68, "learning_rate": 1.5934065934065933e-06, "loss": 0.4062, "step": 11453, "task_loss": 1.0239975452423096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38849514722824097, "epoch": 9.68, "learning_rate": 1.5891800507185122e-06, "loss": 0.3113, "step": 11454, "task_loss": 0.1842092126607895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2801484167575836, "epoch": 9.68, "learning_rate": 1.5849535080304311e-06, "loss": 0.3237, "step": 11455, "task_loss": 0.7476338744163513 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16617603600025177, "epoch": 9.68, "learning_rate": 1.5807269653423498e-06, "loss": 0.3585, "step": 11456, "task_loss": 0.1819252073764801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3295549750328064, "epoch": 9.68, "learning_rate": 1.5765004226542687e-06, "loss": 0.411, "step": 11457, "task_loss": 0.12821581959724426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20386052131652832, "epoch": 9.69, "learning_rate": 1.5722738799661876e-06, "loss": 0.3095, "step": 11458, "task_loss": 0.04509374499320984 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3831862807273865, "epoch": 9.69, "learning_rate": 1.5680473372781067e-06, "loss": 0.2851, "step": 11459, "task_loss": 0.5460574626922607 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3222775459289551, "epoch": 9.69, "learning_rate": 1.5638207945900256e-06, "loss": 0.3737, "step": 11460, "task_loss": 0.4812104105949402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2854434549808502, "epoch": 9.69, "learning_rate": 1.5595942519019443e-06, "loss": 0.2827, "step": 11461, "task_loss": 0.6966005563735962 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2175339162349701, "epoch": 9.69, "learning_rate": 1.5553677092138632e-06, "loss": 0.3884, "step": 11462, "task_loss": 0.4979347288608551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24974189698696136, "epoch": 9.69, "learning_rate": 1.5511411665257818e-06, "loss": 0.2975, "step": 11463, "task_loss": 0.14532750844955444 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5196920037269592, "epoch": 9.69, "learning_rate": 1.5469146238377007e-06, "loss": 0.438, "step": 11464, "task_loss": 1.0485488176345825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2503054440021515, "epoch": 9.69, "learning_rate": 1.5426880811496196e-06, "loss": 0.312, "step": 11465, "task_loss": 0.7960480451583862 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.8321512937545776, "epoch": 9.69, "learning_rate": 1.5384615384615387e-06, "loss": 0.5434, "step": 11466, "task_loss": 0.5021150708198547 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29590654373168945, "epoch": 9.69, "learning_rate": 1.5342349957734574e-06, "loss": 0.3848, "step": 11467, "task_loss": 0.4462801218032837 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4700407385826111, "epoch": 9.69, "learning_rate": 1.5300084530853763e-06, "loss": 0.4554, "step": 11468, "task_loss": 0.9669203758239746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.435629278421402, "epoch": 9.69, "learning_rate": 1.5257819103972952e-06, "loss": 0.3604, "step": 11469, "task_loss": 1.2691365480422974 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6612328290939331, "epoch": 9.7, "learning_rate": 1.521555367709214e-06, "loss": 0.4595, "step": 11470, "task_loss": 0.7653945684432983 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4318428635597229, "epoch": 9.7, "learning_rate": 1.5173288250211328e-06, "loss": 0.4243, "step": 11471, "task_loss": 0.9471795558929443 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5161401629447937, "epoch": 9.7, "learning_rate": 1.5131022823330516e-06, "loss": 0.3618, "step": 11472, "task_loss": 0.3132745325565338 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4227718412876129, "epoch": 9.7, "learning_rate": 1.5088757396449705e-06, "loss": 0.3155, "step": 11473, "task_loss": 0.30955770611763 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20657232403755188, "epoch": 9.7, "learning_rate": 1.5046491969568894e-06, "loss": 0.2378, "step": 11474, "task_loss": 0.31897175312042236 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5543172359466553, "epoch": 9.7, "learning_rate": 1.5004226542688081e-06, "loss": 0.4827, "step": 11475, "task_loss": 0.2159649133682251 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44140732288360596, "epoch": 9.7, "learning_rate": 1.496196111580727e-06, "loss": 0.3811, "step": 11476, "task_loss": 0.4367848038673401 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.284241259098053, "epoch": 9.7, "learning_rate": 1.4919695688926459e-06, "loss": 0.373, "step": 11477, "task_loss": 0.2689046859741211 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3105393052101135, "epoch": 9.7, "learning_rate": 1.4877430262045648e-06, "loss": 0.3292, "step": 11478, "task_loss": 0.14010834693908691 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40796399116516113, "epoch": 9.7, "learning_rate": 1.4835164835164835e-06, "loss": 0.3892, "step": 11479, "task_loss": 0.40134066343307495 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5347166657447815, "epoch": 9.7, "learning_rate": 1.4792899408284024e-06, "loss": 0.3992, "step": 11480, "task_loss": 1.2211244106292725 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3464749753475189, "epoch": 9.7, "learning_rate": 1.4750633981403215e-06, "loss": 0.4054, "step": 11481, "task_loss": 1.366295576095581 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2973012328147888, "epoch": 9.71, "learning_rate": 1.4708368554522401e-06, "loss": 0.4435, "step": 11482, "task_loss": 0.32228514552116394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25689366459846497, "epoch": 9.71, "learning_rate": 1.466610312764159e-06, "loss": 0.3382, "step": 11483, "task_loss": 0.8613317608833313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31057053804397583, "epoch": 9.71, "learning_rate": 1.462383770076078e-06, "loss": 0.3769, "step": 11484, "task_loss": 0.31447872519493103 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37916329503059387, "epoch": 9.71, "learning_rate": 1.4581572273879968e-06, "loss": 0.3606, "step": 11485, "task_loss": 0.511221170425415 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2058028280735016, "epoch": 9.71, "learning_rate": 1.4539306846999155e-06, "loss": 0.275, "step": 11486, "task_loss": 0.724391758441925 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16803047060966492, "epoch": 9.71, "learning_rate": 1.4497041420118344e-06, "loss": 0.3363, "step": 11487, "task_loss": 0.06658004224300385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4559987783432007, "epoch": 9.71, "learning_rate": 1.4454775993237533e-06, "loss": 0.3721, "step": 11488, "task_loss": 0.9642744064331055 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3305496573448181, "epoch": 9.71, "learning_rate": 1.4412510566356722e-06, "loss": 0.391, "step": 11489, "task_loss": 0.30208584666252136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20133759081363678, "epoch": 9.71, "learning_rate": 1.4370245139475908e-06, "loss": 0.3551, "step": 11490, "task_loss": 0.6635864973068237 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5491741299629211, "epoch": 9.71, "learning_rate": 1.4327979712595097e-06, "loss": 0.5044, "step": 11491, "task_loss": 0.6537925004959106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27279967069625854, "epoch": 9.71, "learning_rate": 1.4285714285714286e-06, "loss": 0.2547, "step": 11492, "task_loss": 0.2347949892282486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25307825207710266, "epoch": 9.71, "learning_rate": 1.4243448858833475e-06, "loss": 0.3161, "step": 11493, "task_loss": 0.0667300820350647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5817744731903076, "epoch": 9.72, "learning_rate": 1.4201183431952662e-06, "loss": 0.4563, "step": 11494, "task_loss": 1.0616064071655273 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3033926486968994, "epoch": 9.72, "learning_rate": 1.415891800507185e-06, "loss": 0.3704, "step": 11495, "task_loss": 1.3076547384262085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39327317476272583, "epoch": 9.72, "learning_rate": 1.4116652578191042e-06, "loss": 0.3331, "step": 11496, "task_loss": 0.31131288409233093 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20547592639923096, "epoch": 9.72, "learning_rate": 1.4074387151310229e-06, "loss": 0.2665, "step": 11497, "task_loss": 0.43815693259239197 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16236069798469543, "epoch": 9.72, "learning_rate": 1.4032121724429418e-06, "loss": 0.4039, "step": 11498, "task_loss": 0.5727542638778687 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.46614569425582886, "epoch": 9.72, "learning_rate": 1.3989856297548607e-06, "loss": 0.4063, "step": 11499, "task_loss": 0.8678928017616272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22096040844917297, "epoch": 9.72, "learning_rate": 1.3947590870667795e-06, "loss": 0.312, "step": 11500, "task_loss": 0.6331918239593506 }, { "epoch": 9.72, "eval_accuracy": 0.9171881188118812, "eval_loss": 0.24021731317043304, "eval_runtime": 226.013, "eval_samples_per_second": 111.719, "eval_steps_per_second": 0.876, "step": 11500 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42182546854019165, "epoch": 9.72, "learning_rate": 1.3905325443786982e-06, "loss": 0.3711, "step": 11501, "task_loss": 0.6707150936126709 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2353561818599701, "epoch": 9.72, "learning_rate": 1.3863060016906171e-06, "loss": 0.3023, "step": 11502, "task_loss": 0.38667890429496765 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2755362391471863, "epoch": 9.72, "learning_rate": 1.382079459002536e-06, "loss": 0.3937, "step": 11503, "task_loss": 0.7184953689575195 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35116761922836304, "epoch": 9.72, "learning_rate": 1.377852916314455e-06, "loss": 0.3963, "step": 11504, "task_loss": 0.492603063583374 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3725813031196594, "epoch": 9.72, "learning_rate": 1.3736263736263736e-06, "loss": 0.2927, "step": 11505, "task_loss": 0.6261778473854065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18086005747318268, "epoch": 9.73, "learning_rate": 1.3693998309382925e-06, "loss": 0.2974, "step": 11506, "task_loss": 0.74566650390625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2075197696685791, "epoch": 9.73, "learning_rate": 1.3651732882502114e-06, "loss": 0.2489, "step": 11507, "task_loss": 0.2277413159608841 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4005768299102783, "epoch": 9.73, "learning_rate": 1.3609467455621303e-06, "loss": 0.3565, "step": 11508, "task_loss": 1.1678518056869507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29778093099594116, "epoch": 9.73, "learning_rate": 1.356720202874049e-06, "loss": 0.3013, "step": 11509, "task_loss": 0.4046175479888916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3814990222454071, "epoch": 9.73, "learning_rate": 1.3524936601859678e-06, "loss": 0.42, "step": 11510, "task_loss": 0.5970731973648071 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5035764575004578, "epoch": 9.73, "learning_rate": 1.348267117497887e-06, "loss": 0.4031, "step": 11511, "task_loss": 1.3242605924606323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6035146713256836, "epoch": 9.73, "learning_rate": 1.3440405748098056e-06, "loss": 0.4419, "step": 11512, "task_loss": 0.5378085970878601 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38605213165283203, "epoch": 9.73, "learning_rate": 1.3398140321217245e-06, "loss": 0.4696, "step": 11513, "task_loss": 0.40572240948677063 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33871760964393616, "epoch": 9.73, "learning_rate": 1.3355874894336434e-06, "loss": 0.4162, "step": 11514, "task_loss": 0.8985888361930847 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2023114413022995, "epoch": 9.73, "learning_rate": 1.3313609467455623e-06, "loss": 0.2346, "step": 11515, "task_loss": 0.6903096437454224 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28632354736328125, "epoch": 9.73, "learning_rate": 1.327134404057481e-06, "loss": 0.2907, "step": 11516, "task_loss": 0.5053189396858215 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2577715218067169, "epoch": 9.73, "learning_rate": 1.3229078613693999e-06, "loss": 0.3585, "step": 11517, "task_loss": 0.10421779751777649 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49105221033096313, "epoch": 9.74, "learning_rate": 1.3186813186813187e-06, "loss": 0.3824, "step": 11518, "task_loss": 0.41296112537384033 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21086427569389343, "epoch": 9.74, "learning_rate": 1.3144547759932376e-06, "loss": 0.3201, "step": 11519, "task_loss": 0.398397833108902 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21979159116744995, "epoch": 9.74, "learning_rate": 1.3102282333051563e-06, "loss": 0.3003, "step": 11520, "task_loss": 0.052946317940950394 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3397313356399536, "epoch": 9.74, "learning_rate": 1.3060016906170752e-06, "loss": 0.4624, "step": 11521, "task_loss": 0.9207710027694702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5669938325881958, "epoch": 9.74, "learning_rate": 1.301775147928994e-06, "loss": 0.3863, "step": 11522, "task_loss": 0.9419997334480286 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18719340860843658, "epoch": 9.74, "learning_rate": 1.297548605240913e-06, "loss": 0.3273, "step": 11523, "task_loss": 0.4451925456523895 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3807867765426636, "epoch": 9.74, "learning_rate": 1.2933220625528317e-06, "loss": 0.3498, "step": 11524, "task_loss": 0.24388277530670166 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5017861723899841, "epoch": 9.74, "learning_rate": 1.2890955198647506e-06, "loss": 0.3739, "step": 11525, "task_loss": 0.5985338687896729 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4993566572666168, "epoch": 9.74, "learning_rate": 1.2848689771766697e-06, "loss": 0.3549, "step": 11526, "task_loss": 0.7203245162963867 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18379151821136475, "epoch": 9.74, "learning_rate": 1.2806424344885886e-06, "loss": 0.2299, "step": 11527, "task_loss": 0.12222649157047272 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28931543231010437, "epoch": 9.74, "learning_rate": 1.2764158918005072e-06, "loss": 0.3222, "step": 11528, "task_loss": 0.6729823350906372 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3812021017074585, "epoch": 9.75, "learning_rate": 1.2721893491124261e-06, "loss": 0.285, "step": 11529, "task_loss": 0.5738945007324219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2791770398616791, "epoch": 9.75, "learning_rate": 1.267962806424345e-06, "loss": 0.2271, "step": 11530, "task_loss": 0.3496460020542145 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23038524389266968, "epoch": 9.75, "learning_rate": 1.263736263736264e-06, "loss": 0.3871, "step": 11531, "task_loss": 0.3085528016090393 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3166860342025757, "epoch": 9.75, "learning_rate": 1.2595097210481826e-06, "loss": 0.2987, "step": 11532, "task_loss": 0.5186700820922852 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31599241495132446, "epoch": 9.75, "learning_rate": 1.2552831783601015e-06, "loss": 0.276, "step": 11533, "task_loss": 0.32217615842819214 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1491033434867859, "epoch": 9.75, "learning_rate": 1.2510566356720204e-06, "loss": 0.2725, "step": 11534, "task_loss": 0.1769881248474121 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38832467794418335, "epoch": 9.75, "learning_rate": 1.2468300929839393e-06, "loss": 0.3046, "step": 11535, "task_loss": 1.1538777351379395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3228498697280884, "epoch": 9.75, "learning_rate": 1.242603550295858e-06, "loss": 0.3606, "step": 11536, "task_loss": 0.46882614493370056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23697489500045776, "epoch": 9.75, "learning_rate": 1.2383770076077768e-06, "loss": 0.293, "step": 11537, "task_loss": 0.5640213489532471 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6739101409912109, "epoch": 9.75, "learning_rate": 1.2341504649196957e-06, "loss": 0.4538, "step": 11538, "task_loss": 0.6842974424362183 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2328580766916275, "epoch": 9.75, "learning_rate": 1.2299239222316146e-06, "loss": 0.3361, "step": 11539, "task_loss": 0.2995491623878479 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43693795800209045, "epoch": 9.75, "learning_rate": 1.2256973795435333e-06, "loss": 0.4444, "step": 11540, "task_loss": 1.7200641632080078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16191980242729187, "epoch": 9.76, "learning_rate": 1.2214708368554524e-06, "loss": 0.3562, "step": 11541, "task_loss": 0.2957480847835541 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3020510971546173, "epoch": 9.76, "learning_rate": 1.2172442941673713e-06, "loss": 0.427, "step": 11542, "task_loss": 0.89032381772995 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4471988081932068, "epoch": 9.76, "learning_rate": 1.21301775147929e-06, "loss": 0.381, "step": 11543, "task_loss": 0.6252469420433044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2768203020095825, "epoch": 9.76, "learning_rate": 1.2087912087912089e-06, "loss": 0.2695, "step": 11544, "task_loss": 0.38265687227249146 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21978306770324707, "epoch": 9.76, "learning_rate": 1.2045646661031278e-06, "loss": 0.2546, "step": 11545, "task_loss": 0.4047967493534088 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33415138721466064, "epoch": 9.76, "learning_rate": 1.2003381234150467e-06, "loss": 0.4065, "step": 11546, "task_loss": 0.6916629076004028 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5713497996330261, "epoch": 9.76, "learning_rate": 1.1961115807269653e-06, "loss": 0.4325, "step": 11547, "task_loss": 0.43093061447143555 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1856275349855423, "epoch": 9.76, "learning_rate": 1.1918850380388842e-06, "loss": 0.3619, "step": 11548, "task_loss": 0.7085734605789185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3406912684440613, "epoch": 9.76, "learning_rate": 1.1876584953508031e-06, "loss": 0.3242, "step": 11549, "task_loss": 0.522082507610321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.332335889339447, "epoch": 9.76, "learning_rate": 1.183431952662722e-06, "loss": 0.3338, "step": 11550, "task_loss": 0.4854016900062561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22885192930698395, "epoch": 9.76, "learning_rate": 1.1792054099746407e-06, "loss": 0.305, "step": 11551, "task_loss": 0.6380324363708496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3433436453342438, "epoch": 9.76, "learning_rate": 1.1749788672865596e-06, "loss": 0.3856, "step": 11552, "task_loss": 0.5762419700622559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3392272889614105, "epoch": 9.77, "learning_rate": 1.1707523245984785e-06, "loss": 0.3263, "step": 11553, "task_loss": 0.38463836908340454 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34654638171195984, "epoch": 9.77, "learning_rate": 1.1665257819103974e-06, "loss": 0.3287, "step": 11554, "task_loss": 0.44732412695884705 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6483772397041321, "epoch": 9.77, "learning_rate": 1.162299239222316e-06, "loss": 0.4443, "step": 11555, "task_loss": 0.9998729228973389 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5327861905097961, "epoch": 9.77, "learning_rate": 1.1580726965342351e-06, "loss": 0.3523, "step": 11556, "task_loss": 0.38029369711875916 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31533241271972656, "epoch": 9.77, "learning_rate": 1.153846153846154e-06, "loss": 0.3954, "step": 11557, "task_loss": 0.6218183636665344 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23144280910491943, "epoch": 9.77, "learning_rate": 1.1496196111580727e-06, "loss": 0.3568, "step": 11558, "task_loss": 0.5519044995307922 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2794681489467621, "epoch": 9.77, "learning_rate": 1.1453930684699916e-06, "loss": 0.4217, "step": 11559, "task_loss": 0.5025290846824646 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19840696454048157, "epoch": 9.77, "learning_rate": 1.1411665257819105e-06, "loss": 0.2872, "step": 11560, "task_loss": 0.402765154838562 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26462483406066895, "epoch": 9.77, "learning_rate": 1.1369399830938294e-06, "loss": 0.37, "step": 11561, "task_loss": 0.3709114193916321 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4361591935157776, "epoch": 9.77, "learning_rate": 1.132713440405748e-06, "loss": 0.4948, "step": 11562, "task_loss": 0.7134681344032288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4421406686306, "epoch": 9.77, "learning_rate": 1.128486897717667e-06, "loss": 0.3297, "step": 11563, "task_loss": 0.9490401744842529 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35591650009155273, "epoch": 9.77, "learning_rate": 1.1242603550295859e-06, "loss": 0.361, "step": 11564, "task_loss": 0.9820961952209473 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32459041476249695, "epoch": 9.78, "learning_rate": 1.1200338123415047e-06, "loss": 0.3816, "step": 11565, "task_loss": 0.3405025899410248 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2934322655200958, "epoch": 9.78, "learning_rate": 1.1158072696534234e-06, "loss": 0.3137, "step": 11566, "task_loss": 0.9034987688064575 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36788052320480347, "epoch": 9.78, "learning_rate": 1.1115807269653423e-06, "loss": 0.2951, "step": 11567, "task_loss": 0.20638689398765564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3218073844909668, "epoch": 9.78, "learning_rate": 1.1073541842772612e-06, "loss": 0.299, "step": 11568, "task_loss": 0.36746034026145935 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5426002740859985, "epoch": 9.78, "learning_rate": 1.10312764158918e-06, "loss": 0.385, "step": 11569, "task_loss": 0.9694886207580566 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3522389233112335, "epoch": 9.78, "learning_rate": 1.098901098901099e-06, "loss": 0.4498, "step": 11570, "task_loss": 1.200997233390808 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.304076611995697, "epoch": 9.78, "learning_rate": 1.0946745562130179e-06, "loss": 0.3211, "step": 11571, "task_loss": 0.7263556122779846 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4278223216533661, "epoch": 9.78, "learning_rate": 1.0904480135249368e-06, "loss": 0.3513, "step": 11572, "task_loss": 0.6221022009849548 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.266663134098053, "epoch": 9.78, "learning_rate": 1.0862214708368555e-06, "loss": 0.3011, "step": 11573, "task_loss": 0.7180951833724976 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2807101905345917, "epoch": 9.78, "learning_rate": 1.0819949281487743e-06, "loss": 0.3923, "step": 11574, "task_loss": 0.8153069019317627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3259927034378052, "epoch": 9.78, "learning_rate": 1.0777683854606932e-06, "loss": 0.3119, "step": 11575, "task_loss": 0.6077529191970825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3847094178199768, "epoch": 9.78, "learning_rate": 1.0735418427726121e-06, "loss": 0.3551, "step": 11576, "task_loss": 1.1541520357131958 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.41597694158554077, "epoch": 9.79, "learning_rate": 1.0693153000845308e-06, "loss": 0.3672, "step": 11577, "task_loss": 0.5734269618988037 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2670394480228424, "epoch": 9.79, "learning_rate": 1.0650887573964497e-06, "loss": 0.3061, "step": 11578, "task_loss": 0.5403621196746826 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3795333504676819, "epoch": 9.79, "learning_rate": 1.0608622147083686e-06, "loss": 0.3599, "step": 11579, "task_loss": 0.6898123025894165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4665977954864502, "epoch": 9.79, "learning_rate": 1.0566356720202875e-06, "loss": 0.4495, "step": 11580, "task_loss": 0.9498779773712158 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3978946805000305, "epoch": 9.79, "learning_rate": 1.0524091293322062e-06, "loss": 0.4799, "step": 11581, "task_loss": 0.9153242707252502 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2683635354042053, "epoch": 9.79, "learning_rate": 1.048182586644125e-06, "loss": 0.3447, "step": 11582, "task_loss": 0.37186333537101746 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21465590596199036, "epoch": 9.79, "learning_rate": 1.043956043956044e-06, "loss": 0.3359, "step": 11583, "task_loss": 0.06667748838663101 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27030330896377563, "epoch": 9.79, "learning_rate": 1.0397295012679628e-06, "loss": 0.351, "step": 11584, "task_loss": 0.7504514455795288 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18166187405586243, "epoch": 9.79, "learning_rate": 1.0355029585798817e-06, "loss": 0.2479, "step": 11585, "task_loss": 0.399221807718277 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3939642906188965, "epoch": 9.79, "learning_rate": 1.0312764158918006e-06, "loss": 0.4348, "step": 11586, "task_loss": 0.229294091463089 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3243493437767029, "epoch": 9.79, "learning_rate": 1.0270498732037195e-06, "loss": 0.3211, "step": 11587, "task_loss": 1.0573574304580688 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.45112287998199463, "epoch": 9.79, "learning_rate": 1.0228233305156384e-06, "loss": 0.4166, "step": 11588, "task_loss": 0.5295839309692383 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33278900384902954, "epoch": 9.8, "learning_rate": 1.018596787827557e-06, "loss": 0.2867, "step": 11589, "task_loss": 0.3663014769554138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24121826887130737, "epoch": 9.8, "learning_rate": 1.014370245139476e-06, "loss": 0.3537, "step": 11590, "task_loss": 0.5832351446151733 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3033680319786072, "epoch": 9.8, "learning_rate": 1.0101437024513949e-06, "loss": 0.4487, "step": 11591, "task_loss": 0.6016051769256592 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3556279242038727, "epoch": 9.8, "learning_rate": 1.0059171597633138e-06, "loss": 0.3237, "step": 11592, "task_loss": 0.15073081851005554 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26481083035469055, "epoch": 9.8, "learning_rate": 1.0016906170752324e-06, "loss": 0.3327, "step": 11593, "task_loss": 0.04011211916804314 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2339170128107071, "epoch": 9.8, "learning_rate": 9.974640743871513e-07, "loss": 0.3434, "step": 11594, "task_loss": 0.21927061676979065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2987024486064911, "epoch": 9.8, "learning_rate": 9.932375316990702e-07, "loss": 0.3275, "step": 11595, "task_loss": 0.16293509304523468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3047736883163452, "epoch": 9.8, "learning_rate": 9.890109890109891e-07, "loss": 0.3202, "step": 11596, "task_loss": 0.8584185242652893 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25748249888420105, "epoch": 9.8, "learning_rate": 9.847844463229078e-07, "loss": 0.3336, "step": 11597, "task_loss": 0.6500364542007446 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24285843968391418, "epoch": 9.8, "learning_rate": 9.805579036348267e-07, "loss": 0.4336, "step": 11598, "task_loss": 0.41308924555778503 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2419148087501526, "epoch": 9.8, "learning_rate": 9.763313609467456e-07, "loss": 0.3728, "step": 11599, "task_loss": 0.12282896041870117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29451170563697815, "epoch": 9.81, "learning_rate": 9.721048182586645e-07, "loss": 0.3962, "step": 11600, "task_loss": 0.2076481133699417 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4167931079864502, "epoch": 9.81, "learning_rate": 9.678782755705834e-07, "loss": 0.4279, "step": 11601, "task_loss": 1.2686623334884644 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38243359327316284, "epoch": 9.81, "learning_rate": 9.636517328825023e-07, "loss": 0.5271, "step": 11602, "task_loss": 0.05235043913125992 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2847591042518616, "epoch": 9.81, "learning_rate": 9.594251901944211e-07, "loss": 0.3144, "step": 11603, "task_loss": 0.4899735152721405 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2498854100704193, "epoch": 9.81, "learning_rate": 9.551986475063398e-07, "loss": 0.3734, "step": 11604, "task_loss": 0.7233586311340332 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2272118628025055, "epoch": 9.81, "learning_rate": 9.509721048182587e-07, "loss": 0.3196, "step": 11605, "task_loss": 0.6324823498725891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24383017420768738, "epoch": 9.81, "learning_rate": 9.467455621301776e-07, "loss": 0.3047, "step": 11606, "task_loss": 0.14181889593601227 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.282803475856781, "epoch": 9.81, "learning_rate": 9.425190194420964e-07, "loss": 0.3255, "step": 11607, "task_loss": 0.10189807415008545 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2623617649078369, "epoch": 9.81, "learning_rate": 9.382924767540153e-07, "loss": 0.4392, "step": 11608, "task_loss": 0.6769603490829468 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24189922213554382, "epoch": 9.81, "learning_rate": 9.340659340659341e-07, "loss": 0.3746, "step": 11609, "task_loss": 0.07162756472826004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2872876524925232, "epoch": 9.81, "learning_rate": 9.29839391377853e-07, "loss": 0.2901, "step": 11610, "task_loss": 0.6414722800254822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4856971800327301, "epoch": 9.81, "learning_rate": 9.256128486897717e-07, "loss": 0.3248, "step": 11611, "task_loss": 0.81694495677948 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31776583194732666, "epoch": 9.82, "learning_rate": 9.213863060016906e-07, "loss": 0.3435, "step": 11612, "task_loss": 0.27536746859550476 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3352828621864319, "epoch": 9.82, "learning_rate": 9.171597633136094e-07, "loss": 0.3293, "step": 11613, "task_loss": 0.23912334442138672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3049991726875305, "epoch": 9.82, "learning_rate": 9.129332206255283e-07, "loss": 0.4625, "step": 11614, "task_loss": 0.8003309369087219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32282668352127075, "epoch": 9.82, "learning_rate": 9.087066779374473e-07, "loss": 0.3723, "step": 11615, "task_loss": 0.7033539414405823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2525600790977478, "epoch": 9.82, "learning_rate": 9.044801352493661e-07, "loss": 0.3721, "step": 11616, "task_loss": 0.699455738067627 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25242024660110474, "epoch": 9.82, "learning_rate": 9.00253592561285e-07, "loss": 0.2967, "step": 11617, "task_loss": 0.2816600501537323 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.44912272691726685, "epoch": 9.82, "learning_rate": 8.960270498732038e-07, "loss": 0.3173, "step": 11618, "task_loss": 1.7904939651489258 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.31990697979927063, "epoch": 9.82, "learning_rate": 8.918005071851227e-07, "loss": 0.2656, "step": 11619, "task_loss": 0.5626444220542908 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.589996874332428, "epoch": 9.82, "learning_rate": 8.875739644970415e-07, "loss": 0.4112, "step": 11620, "task_loss": 1.1437616348266602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3013782799243927, "epoch": 9.82, "learning_rate": 8.833474218089603e-07, "loss": 0.3393, "step": 11621, "task_loss": 0.47835880517959595 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14221367239952087, "epoch": 9.82, "learning_rate": 8.791208791208791e-07, "loss": 0.2425, "step": 11622, "task_loss": 0.18015243113040924 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14566174149513245, "epoch": 9.82, "learning_rate": 8.74894336432798e-07, "loss": 0.2978, "step": 11623, "task_loss": 0.2991926074028015 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5476205348968506, "epoch": 9.83, "learning_rate": 8.706677937447168e-07, "loss": 0.4761, "step": 11624, "task_loss": 1.2383973598480225 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22270378470420837, "epoch": 9.83, "learning_rate": 8.664412510566357e-07, "loss": 0.2882, "step": 11625, "task_loss": 0.28122350573539734 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24863888323307037, "epoch": 9.83, "learning_rate": 8.622147083685545e-07, "loss": 0.3772, "step": 11626, "task_loss": 0.16144612431526184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4593784809112549, "epoch": 9.83, "learning_rate": 8.579881656804734e-07, "loss": 0.3718, "step": 11627, "task_loss": 0.8021537065505981 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36984097957611084, "epoch": 9.83, "learning_rate": 8.537616229923922e-07, "loss": 0.4044, "step": 11628, "task_loss": 1.0335593223571777 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49677038192749023, "epoch": 9.83, "learning_rate": 8.495350803043111e-07, "loss": 0.3859, "step": 11629, "task_loss": 0.7184372544288635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34113413095474243, "epoch": 9.83, "learning_rate": 8.4530853761623e-07, "loss": 0.402, "step": 11630, "task_loss": 0.2766909599304199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.14213956892490387, "epoch": 9.83, "learning_rate": 8.410819949281488e-07, "loss": 0.3076, "step": 11631, "task_loss": 0.10918843746185303 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3124306797981262, "epoch": 9.83, "learning_rate": 8.368554522400677e-07, "loss": 0.4503, "step": 11632, "task_loss": 0.4812248945236206 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34216147661209106, "epoch": 9.83, "learning_rate": 8.326289095519865e-07, "loss": 0.3075, "step": 11633, "task_loss": 0.4338526427745819 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4333937168121338, "epoch": 9.83, "learning_rate": 8.284023668639054e-07, "loss": 0.3339, "step": 11634, "task_loss": 0.45242375135421753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19510699808597565, "epoch": 9.83, "learning_rate": 8.241758241758242e-07, "loss": 0.3675, "step": 11635, "task_loss": 0.28190428018569946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3510788679122925, "epoch": 9.84, "learning_rate": 8.199492814877431e-07, "loss": 0.4133, "step": 11636, "task_loss": 0.6353777647018433 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36686286330223083, "epoch": 9.84, "learning_rate": 8.157227387996619e-07, "loss": 0.3044, "step": 11637, "task_loss": 0.9839715957641602 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38448283076286316, "epoch": 9.84, "learning_rate": 8.114961961115808e-07, "loss": 0.3413, "step": 11638, "task_loss": 0.8712025284767151 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24727946519851685, "epoch": 9.84, "learning_rate": 8.072696534234995e-07, "loss": 0.2839, "step": 11639, "task_loss": 0.13456924259662628 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.316354364156723, "epoch": 9.84, "learning_rate": 8.030431107354184e-07, "loss": 0.2796, "step": 11640, "task_loss": 1.0290857553482056 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2354334592819214, "epoch": 9.84, "learning_rate": 7.988165680473372e-07, "loss": 0.385, "step": 11641, "task_loss": 0.41232091188430786 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32979440689086914, "epoch": 9.84, "learning_rate": 7.945900253592561e-07, "loss": 0.3401, "step": 11642, "task_loss": 0.4732211232185364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30502140522003174, "epoch": 9.84, "learning_rate": 7.903634826711749e-07, "loss": 0.3362, "step": 11643, "task_loss": 0.3674145042896271 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28016650676727295, "epoch": 9.84, "learning_rate": 7.861369399830938e-07, "loss": 0.3047, "step": 11644, "task_loss": 0.2455063909292221 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5164601802825928, "epoch": 9.84, "learning_rate": 7.819103972950128e-07, "loss": 0.377, "step": 11645, "task_loss": 0.833278238773346 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3164857029914856, "epoch": 9.84, "learning_rate": 7.776838546069316e-07, "loss": 0.4618, "step": 11646, "task_loss": 1.531575083732605 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2939262390136719, "epoch": 9.84, "learning_rate": 7.734573119188504e-07, "loss": 0.3897, "step": 11647, "task_loss": 0.813907265663147 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3977244198322296, "epoch": 9.85, "learning_rate": 7.692307692307694e-07, "loss": 0.2994, "step": 11648, "task_loss": 0.45476028323173523 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.293307363986969, "epoch": 9.85, "learning_rate": 7.650042265426881e-07, "loss": 0.3045, "step": 11649, "task_loss": 0.3076777458190918 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.470998078584671, "epoch": 9.85, "learning_rate": 7.60777683854607e-07, "loss": 0.3801, "step": 11650, "task_loss": 0.5332295894622803 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3325941860675812, "epoch": 9.85, "learning_rate": 7.565511411665258e-07, "loss": 0.3193, "step": 11651, "task_loss": 0.42220330238342285 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2850295305252075, "epoch": 9.85, "learning_rate": 7.523245984784447e-07, "loss": 0.3157, "step": 11652, "task_loss": 0.8529147505760193 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34869807958602905, "epoch": 9.85, "learning_rate": 7.480980557903635e-07, "loss": 0.4398, "step": 11653, "task_loss": 0.6998187899589539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27215325832366943, "epoch": 9.85, "learning_rate": 7.438715131022824e-07, "loss": 0.2025, "step": 11654, "task_loss": 1.2488270998001099 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33452659845352173, "epoch": 9.85, "learning_rate": 7.396449704142012e-07, "loss": 0.3508, "step": 11655, "task_loss": 0.511022686958313 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4641001522541046, "epoch": 9.85, "learning_rate": 7.354184277261201e-07, "loss": 0.3915, "step": 11656, "task_loss": 0.6546111106872559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2676555812358856, "epoch": 9.85, "learning_rate": 7.31191885038039e-07, "loss": 0.3914, "step": 11657, "task_loss": 0.044949520379304886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5977452397346497, "epoch": 9.85, "learning_rate": 7.269653423499577e-07, "loss": 0.4101, "step": 11658, "task_loss": 1.1131128072738647 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23896104097366333, "epoch": 9.85, "learning_rate": 7.227387996618766e-07, "loss": 0.4033, "step": 11659, "task_loss": 0.2637478709220886 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2893185317516327, "epoch": 9.86, "learning_rate": 7.185122569737954e-07, "loss": 0.4162, "step": 11660, "task_loss": 0.9951228499412537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35025647282600403, "epoch": 9.86, "learning_rate": 7.142857142857143e-07, "loss": 0.2246, "step": 11661, "task_loss": 0.5603922605514526 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37135398387908936, "epoch": 9.86, "learning_rate": 7.100591715976331e-07, "loss": 0.2985, "step": 11662, "task_loss": 0.5651071667671204 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30270451307296753, "epoch": 9.86, "learning_rate": 7.058326289095521e-07, "loss": 0.3694, "step": 11663, "task_loss": 0.9176321029663086 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3636479377746582, "epoch": 9.86, "learning_rate": 7.016060862214709e-07, "loss": 0.3244, "step": 11664, "task_loss": 0.3930325508117676 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3956620693206787, "epoch": 9.86, "learning_rate": 6.973795435333898e-07, "loss": 0.4029, "step": 11665, "task_loss": 0.3240499496459961 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5820457935333252, "epoch": 9.86, "learning_rate": 6.931530008453086e-07, "loss": 0.545, "step": 11666, "task_loss": 1.336995244026184 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27669885754585266, "epoch": 9.86, "learning_rate": 6.889264581572275e-07, "loss": 0.2479, "step": 11667, "task_loss": 0.4336546063423157 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43685275316238403, "epoch": 9.86, "learning_rate": 6.846999154691462e-07, "loss": 0.4214, "step": 11668, "task_loss": 1.184675931930542 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.48736968636512756, "epoch": 9.86, "learning_rate": 6.804733727810651e-07, "loss": 0.3439, "step": 11669, "task_loss": 1.307713508605957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.7637746334075928, "epoch": 9.86, "learning_rate": 6.762468300929839e-07, "loss": 0.496, "step": 11670, "task_loss": 0.8066900372505188 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29835572838783264, "epoch": 9.87, "learning_rate": 6.720202874049028e-07, "loss": 0.3217, "step": 11671, "task_loss": 0.35983768105506897 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.21950377523899078, "epoch": 9.87, "learning_rate": 6.677937447168217e-07, "loss": 0.3258, "step": 11672, "task_loss": 0.07857642322778702 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32372206449508667, "epoch": 9.87, "learning_rate": 6.635672020287405e-07, "loss": 0.2994, "step": 11673, "task_loss": 0.3583196699619293 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30379703640937805, "epoch": 9.87, "learning_rate": 6.593406593406594e-07, "loss": 0.382, "step": 11674, "task_loss": 1.4303418397903442 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4729366898536682, "epoch": 9.87, "learning_rate": 6.551141166525782e-07, "loss": 0.3884, "step": 11675, "task_loss": 1.2082672119140625 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3671010136604309, "epoch": 9.87, "learning_rate": 6.50887573964497e-07, "loss": 0.3819, "step": 11676, "task_loss": 1.9791260957717896 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1895056962966919, "epoch": 9.87, "learning_rate": 6.466610312764158e-07, "loss": 0.3216, "step": 11677, "task_loss": 0.01748405396938324 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.16998028755187988, "epoch": 9.87, "learning_rate": 6.424344885883348e-07, "loss": 0.2553, "step": 11678, "task_loss": 0.24657247960567474 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23196865618228912, "epoch": 9.87, "learning_rate": 6.382079459002536e-07, "loss": 0.2172, "step": 11679, "task_loss": 0.12969322502613068 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42950764298439026, "epoch": 9.87, "learning_rate": 6.339814032121725e-07, "loss": 0.5153, "step": 11680, "task_loss": 0.4721524119377136 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2525715231895447, "epoch": 9.87, "learning_rate": 6.297548605240913e-07, "loss": 0.3088, "step": 11681, "task_loss": 0.16983474791049957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37924784421920776, "epoch": 9.87, "learning_rate": 6.255283178360102e-07, "loss": 0.3107, "step": 11682, "task_loss": 0.2876981496810913 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1962471902370453, "epoch": 9.88, "learning_rate": 6.21301775147929e-07, "loss": 0.2913, "step": 11683, "task_loss": 0.7153844237327576 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37121933698654175, "epoch": 9.88, "learning_rate": 6.170752324598479e-07, "loss": 0.349, "step": 11684, "task_loss": 0.7558860778808594 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26068949699401855, "epoch": 9.88, "learning_rate": 6.128486897717667e-07, "loss": 0.3951, "step": 11685, "task_loss": 0.35780027508735657 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29396599531173706, "epoch": 9.88, "learning_rate": 6.086221470836857e-07, "loss": 0.3015, "step": 11686, "task_loss": 0.35192227363586426 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23912331461906433, "epoch": 9.88, "learning_rate": 6.043956043956044e-07, "loss": 0.3171, "step": 11687, "task_loss": 0.19755369424819946 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3885962963104248, "epoch": 9.88, "learning_rate": 6.001690617075233e-07, "loss": 0.4163, "step": 11688, "task_loss": 1.0168933868408203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23741543292999268, "epoch": 9.88, "learning_rate": 5.959425190194421e-07, "loss": 0.3953, "step": 11689, "task_loss": 0.17260943353176117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28203392028808594, "epoch": 9.88, "learning_rate": 5.91715976331361e-07, "loss": 0.3141, "step": 11690, "task_loss": 0.1484801173210144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23727358877658844, "epoch": 9.88, "learning_rate": 5.874894336432798e-07, "loss": 0.278, "step": 11691, "task_loss": 0.1387133151292801 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19000782072544098, "epoch": 9.88, "learning_rate": 5.832628909551987e-07, "loss": 0.3064, "step": 11692, "task_loss": 0.5097707509994507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39126887917518616, "epoch": 9.88, "learning_rate": 5.790363482671176e-07, "loss": 0.2995, "step": 11693, "task_loss": 0.4464239478111267 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2658774256706238, "epoch": 9.88, "learning_rate": 5.748098055790364e-07, "loss": 0.343, "step": 11694, "task_loss": 0.34079092741012573 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2129579782485962, "epoch": 9.89, "learning_rate": 5.705832628909553e-07, "loss": 0.3307, "step": 11695, "task_loss": 0.29643699526786804 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3026198148727417, "epoch": 9.89, "learning_rate": 5.66356720202874e-07, "loss": 0.3112, "step": 11696, "task_loss": 0.20042301714420319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22726619243621826, "epoch": 9.89, "learning_rate": 5.621301775147929e-07, "loss": 0.3357, "step": 11697, "task_loss": 0.12034071981906891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25201886892318726, "epoch": 9.89, "learning_rate": 5.579036348267117e-07, "loss": 0.3915, "step": 11698, "task_loss": 0.8884305357933044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3317891061306, "epoch": 9.89, "learning_rate": 5.536770921386306e-07, "loss": 0.3472, "step": 11699, "task_loss": 0.19472239911556244 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.13104330003261566, "epoch": 9.89, "learning_rate": 5.494505494505495e-07, "loss": 0.2407, "step": 11700, "task_loss": 0.30573517084121704 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4251641631126404, "epoch": 9.89, "learning_rate": 5.452240067624684e-07, "loss": 0.3792, "step": 11701, "task_loss": 2.264606237411499 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2899189889431, "epoch": 9.89, "learning_rate": 5.409974640743872e-07, "loss": 0.3221, "step": 11702, "task_loss": 0.7569632530212402 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32380542159080505, "epoch": 9.89, "learning_rate": 5.367709213863061e-07, "loss": 0.3383, "step": 11703, "task_loss": 0.39251238107681274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3275623917579651, "epoch": 9.89, "learning_rate": 5.325443786982249e-07, "loss": 0.392, "step": 11704, "task_loss": 0.8659747242927551 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3574153482913971, "epoch": 9.89, "learning_rate": 5.283178360101437e-07, "loss": 0.4121, "step": 11705, "task_loss": 1.2058131694793701 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26528435945510864, "epoch": 9.89, "learning_rate": 5.240912933220625e-07, "loss": 0.3632, "step": 11706, "task_loss": 0.23518306016921997 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3148086667060852, "epoch": 9.9, "learning_rate": 5.198647506339814e-07, "loss": 0.3218, "step": 11707, "task_loss": 0.9215880036354065 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2723386287689209, "epoch": 9.9, "learning_rate": 5.156382079459003e-07, "loss": 0.318, "step": 11708, "task_loss": 1.4038259983062744 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4142744243144989, "epoch": 9.9, "learning_rate": 5.114116652578192e-07, "loss": 0.3177, "step": 11709, "task_loss": 0.6680419445037842 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4938109517097473, "epoch": 9.9, "learning_rate": 5.07185122569738e-07, "loss": 0.375, "step": 11710, "task_loss": 1.2160124778747559 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4601837992668152, "epoch": 9.9, "learning_rate": 5.029585798816569e-07, "loss": 0.3874, "step": 11711, "task_loss": 0.6291356086730957 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1953631043434143, "epoch": 9.9, "learning_rate": 4.987320371935757e-07, "loss": 0.2847, "step": 11712, "task_loss": 0.17378218472003937 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3112950325012207, "epoch": 9.9, "learning_rate": 4.945054945054946e-07, "loss": 0.4124, "step": 11713, "task_loss": 0.7834708094596863 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5255270600318909, "epoch": 9.9, "learning_rate": 4.902789518174133e-07, "loss": 0.3007, "step": 11714, "task_loss": 0.9972193241119385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4606904685497284, "epoch": 9.9, "learning_rate": 4.860524091293322e-07, "loss": 0.3886, "step": 11715, "task_loss": 0.7609273195266724 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26381057500839233, "epoch": 9.9, "learning_rate": 4.818258664412511e-07, "loss": 0.3228, "step": 11716, "task_loss": 0.6803664565086365 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42576780915260315, "epoch": 9.9, "learning_rate": 4.775993237531699e-07, "loss": 0.5524, "step": 11717, "task_loss": 0.9163270592689514 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.538731038570404, "epoch": 9.9, "learning_rate": 4.733727810650888e-07, "loss": 0.3756, "step": 11718, "task_loss": 0.8445892930030823 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5840818285942078, "epoch": 9.91, "learning_rate": 4.6914623837700764e-07, "loss": 0.3939, "step": 11719, "task_loss": 0.45174503326416016 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4116341173648834, "epoch": 9.91, "learning_rate": 4.649196956889265e-07, "loss": 0.3107, "step": 11720, "task_loss": 0.8853473663330078 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25085964798927307, "epoch": 9.91, "learning_rate": 4.606931530008453e-07, "loss": 0.3378, "step": 11721, "task_loss": 1.6889536380767822 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2072075754404068, "epoch": 9.91, "learning_rate": 4.5646661031276416e-07, "loss": 0.3158, "step": 11722, "task_loss": 0.4609294533729553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1892644613981247, "epoch": 9.91, "learning_rate": 4.5224006762468305e-07, "loss": 0.3881, "step": 11723, "task_loss": 0.45453980565071106 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.681640088558197, "epoch": 9.91, "learning_rate": 4.480135249366019e-07, "loss": 0.4691, "step": 11724, "task_loss": 1.1834304332733154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3957146406173706, "epoch": 9.91, "learning_rate": 4.4378698224852073e-07, "loss": 0.3689, "step": 11725, "task_loss": 0.0429813526570797 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4019812345504761, "epoch": 9.91, "learning_rate": 4.3956043956043957e-07, "loss": 0.3422, "step": 11726, "task_loss": 0.31533750891685486 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24700915813446045, "epoch": 9.91, "learning_rate": 4.353338968723584e-07, "loss": 0.313, "step": 11727, "task_loss": 0.2257741540670395 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18115797638893127, "epoch": 9.91, "learning_rate": 4.3110735418427724e-07, "loss": 0.2315, "step": 11728, "task_loss": 0.8204843401908875 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3415915071964264, "epoch": 9.91, "learning_rate": 4.268808114961961e-07, "loss": 0.4687, "step": 11729, "task_loss": 0.8845313787460327 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.27753379940986633, "epoch": 9.91, "learning_rate": 4.22654268808115e-07, "loss": 0.3118, "step": 11730, "task_loss": 0.5955291390419006 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.260311484336853, "epoch": 9.92, "learning_rate": 4.1842772612003386e-07, "loss": 0.3838, "step": 11731, "task_loss": 0.7572807669639587 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5177025198936462, "epoch": 9.92, "learning_rate": 4.142011834319527e-07, "loss": 0.3524, "step": 11732, "task_loss": 1.8879550695419312 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4819498062133789, "epoch": 9.92, "learning_rate": 4.0997464074387154e-07, "loss": 0.3571, "step": 11733, "task_loss": 0.8797711133956909 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39422139525413513, "epoch": 9.92, "learning_rate": 4.057480980557904e-07, "loss": 0.3882, "step": 11734, "task_loss": 0.594251275062561 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4268626570701599, "epoch": 9.92, "learning_rate": 4.015215553677092e-07, "loss": 0.3849, "step": 11735, "task_loss": 0.7167497873306274 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.26411309838294983, "epoch": 9.92, "learning_rate": 3.9729501267962806e-07, "loss": 0.3858, "step": 11736, "task_loss": 0.8943134546279907 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4469754993915558, "epoch": 9.92, "learning_rate": 3.930684699915469e-07, "loss": 0.3523, "step": 11737, "task_loss": 0.7392475008964539 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38543856143951416, "epoch": 9.92, "learning_rate": 3.888419273034658e-07, "loss": 0.3296, "step": 11738, "task_loss": 0.5616697072982788 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25632214546203613, "epoch": 9.92, "learning_rate": 3.846153846153847e-07, "loss": 0.4191, "step": 11739, "task_loss": 0.6170924305915833 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35441380739212036, "epoch": 9.92, "learning_rate": 3.803888419273035e-07, "loss": 0.4002, "step": 11740, "task_loss": 0.7470883727073669 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33617663383483887, "epoch": 9.92, "learning_rate": 3.7616229923922236e-07, "loss": 0.4149, "step": 11741, "task_loss": 0.5135720372200012 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4026612937450409, "epoch": 9.93, "learning_rate": 3.719357565511412e-07, "loss": 0.3361, "step": 11742, "task_loss": 0.6488247513771057 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5324593782424927, "epoch": 9.93, "learning_rate": 3.6770921386306003e-07, "loss": 0.3984, "step": 11743, "task_loss": 0.9223433136940002 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5193485021591187, "epoch": 9.93, "learning_rate": 3.6348267117497887e-07, "loss": 0.393, "step": 11744, "task_loss": 0.49762797355651855 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5189992189407349, "epoch": 9.93, "learning_rate": 3.592561284868977e-07, "loss": 0.4286, "step": 11745, "task_loss": 0.8544949889183044 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47492849826812744, "epoch": 9.93, "learning_rate": 3.5502958579881655e-07, "loss": 0.4421, "step": 11746, "task_loss": 1.631609320640564 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5366489291191101, "epoch": 9.93, "learning_rate": 3.5080304311073544e-07, "loss": 0.3366, "step": 11747, "task_loss": 0.6337089538574219 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3371276259422302, "epoch": 9.93, "learning_rate": 3.465765004226543e-07, "loss": 0.2871, "step": 11748, "task_loss": 0.32683488726615906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47234320640563965, "epoch": 9.93, "learning_rate": 3.423499577345731e-07, "loss": 0.3862, "step": 11749, "task_loss": 1.876320242881775 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32776206731796265, "epoch": 9.93, "learning_rate": 3.3812341504649196e-07, "loss": 0.5036, "step": 11750, "task_loss": 1.546988606452942 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3000595271587372, "epoch": 9.93, "learning_rate": 3.3389687235841085e-07, "loss": 0.2902, "step": 11751, "task_loss": 0.8016992211341858 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3162441849708557, "epoch": 9.93, "learning_rate": 3.296703296703297e-07, "loss": 0.3835, "step": 11752, "task_loss": 0.3264237940311432 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19355684518814087, "epoch": 9.93, "learning_rate": 3.254437869822485e-07, "loss": 0.2912, "step": 11753, "task_loss": 0.45622894167900085 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22515863180160522, "epoch": 9.94, "learning_rate": 3.212172442941674e-07, "loss": 0.3743, "step": 11754, "task_loss": 0.43019795417785645 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.39934051036834717, "epoch": 9.94, "learning_rate": 3.1699070160608626e-07, "loss": 0.3113, "step": 11755, "task_loss": 0.30255988240242004 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.42757365107536316, "epoch": 9.94, "learning_rate": 3.127641589180051e-07, "loss": 0.5953, "step": 11756, "task_loss": 0.791983962059021 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1849563866853714, "epoch": 9.94, "learning_rate": 3.0853761622992393e-07, "loss": 0.3166, "step": 11757, "task_loss": 0.12404951453208923 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3444669544696808, "epoch": 9.94, "learning_rate": 3.043110735418428e-07, "loss": 0.3114, "step": 11758, "task_loss": 0.12442126870155334 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2463337630033493, "epoch": 9.94, "learning_rate": 3.0008453085376166e-07, "loss": 0.2859, "step": 11759, "task_loss": 0.6186110973358154 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3415706157684326, "epoch": 9.94, "learning_rate": 2.958579881656805e-07, "loss": 0.3007, "step": 11760, "task_loss": 0.5110601186752319 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.37953025102615356, "epoch": 9.94, "learning_rate": 2.9163144547759934e-07, "loss": 0.3598, "step": 11761, "task_loss": 0.5044890642166138 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23354989290237427, "epoch": 9.94, "learning_rate": 2.874049027895182e-07, "loss": 0.4289, "step": 11762, "task_loss": 0.28337883949279785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22724463045597076, "epoch": 9.94, "learning_rate": 2.83178360101437e-07, "loss": 0.3825, "step": 11763, "task_loss": 0.4275471270084381 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18522074818611145, "epoch": 9.94, "learning_rate": 2.7895181741335586e-07, "loss": 0.3035, "step": 11764, "task_loss": 0.15577220916748047 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2865057587623596, "epoch": 9.94, "learning_rate": 2.7472527472527475e-07, "loss": 0.3349, "step": 11765, "task_loss": 0.5225082039833069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.38515332341194153, "epoch": 9.95, "learning_rate": 2.704987320371936e-07, "loss": 0.3316, "step": 11766, "task_loss": 0.30551522970199585 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4762231707572937, "epoch": 9.95, "learning_rate": 2.662721893491124e-07, "loss": 0.3445, "step": 11767, "task_loss": 0.38365766406059265 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25808727741241455, "epoch": 9.95, "learning_rate": 2.6204564666103126e-07, "loss": 0.351, "step": 11768, "task_loss": 0.6372522711753845 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.5280554294586182, "epoch": 9.95, "learning_rate": 2.5781910397295016e-07, "loss": 0.383, "step": 11769, "task_loss": 0.7633196711540222 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6621668338775635, "epoch": 9.95, "learning_rate": 2.53592561284869e-07, "loss": 0.4276, "step": 11770, "task_loss": 0.4530010223388672 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6073736548423767, "epoch": 9.95, "learning_rate": 2.4936601859678783e-07, "loss": 0.3712, "step": 11771, "task_loss": 0.6752600073814392 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28225088119506836, "epoch": 9.95, "learning_rate": 2.4513947590870667e-07, "loss": 0.3297, "step": 11772, "task_loss": 1.1286174058914185 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3454148769378662, "epoch": 9.95, "learning_rate": 2.4091293322062556e-07, "loss": 0.3399, "step": 11773, "task_loss": 0.12636178731918335 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3395894169807434, "epoch": 9.95, "learning_rate": 2.366863905325444e-07, "loss": 0.3018, "step": 11774, "task_loss": 0.11742892116308212 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2392624169588089, "epoch": 9.95, "learning_rate": 2.3245984784446324e-07, "loss": 0.4079, "step": 11775, "task_loss": 0.31141945719718933 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3980900049209595, "epoch": 9.95, "learning_rate": 2.2823330515638208e-07, "loss": 0.366, "step": 11776, "task_loss": 0.8269650340080261 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3302658200263977, "epoch": 9.95, "learning_rate": 2.2400676246830094e-07, "loss": 0.3754, "step": 11777, "task_loss": 0.7397379875183105 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3014085590839386, "epoch": 9.96, "learning_rate": 2.1978021978021978e-07, "loss": 0.3202, "step": 11778, "task_loss": 0.09280461072921753 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.49418768286705017, "epoch": 9.96, "learning_rate": 2.1555367709213862e-07, "loss": 0.3977, "step": 11779, "task_loss": 0.751481831073761 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.47034943103790283, "epoch": 9.96, "learning_rate": 2.113271344040575e-07, "loss": 0.4858, "step": 11780, "task_loss": 0.967939019203186 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2273130714893341, "epoch": 9.96, "learning_rate": 2.0710059171597635e-07, "loss": 0.2863, "step": 11781, "task_loss": 0.4272995591163635 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34906578063964844, "epoch": 9.96, "learning_rate": 2.028740490278952e-07, "loss": 0.3483, "step": 11782, "task_loss": 0.7774043679237366 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2934296727180481, "epoch": 9.96, "learning_rate": 1.9864750633981403e-07, "loss": 0.3631, "step": 11783, "task_loss": 0.3624795973300934 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.282190203666687, "epoch": 9.96, "learning_rate": 1.944209636517329e-07, "loss": 0.3786, "step": 11784, "task_loss": 0.32133761048316956 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.34569019079208374, "epoch": 9.96, "learning_rate": 1.9019442096365176e-07, "loss": 0.4441, "step": 11785, "task_loss": 0.4626517593860626 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32801681756973267, "epoch": 9.96, "learning_rate": 1.859678782755706e-07, "loss": 0.2539, "step": 11786, "task_loss": 0.06652214378118515 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.24997010827064514, "epoch": 9.96, "learning_rate": 1.8174133558748944e-07, "loss": 0.3825, "step": 11787, "task_loss": 0.08898423612117767 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.20445089042186737, "epoch": 9.96, "learning_rate": 1.7751479289940827e-07, "loss": 0.3181, "step": 11788, "task_loss": 0.5703615546226501 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.40065449476242065, "epoch": 9.96, "learning_rate": 1.7328825021132714e-07, "loss": 0.3168, "step": 11789, "task_loss": 0.40044814348220825 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.322534441947937, "epoch": 9.97, "learning_rate": 1.6906170752324598e-07, "loss": 0.3783, "step": 11790, "task_loss": 0.30410873889923096 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.17657209932804108, "epoch": 9.97, "learning_rate": 1.6483516483516484e-07, "loss": 0.2241, "step": 11791, "task_loss": 0.3425569534301758 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.25555211305618286, "epoch": 9.97, "learning_rate": 1.606086221470837e-07, "loss": 0.356, "step": 11792, "task_loss": 0.8189453482627869 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.35348495841026306, "epoch": 9.97, "learning_rate": 1.5638207945900255e-07, "loss": 0.3425, "step": 11793, "task_loss": 0.4430040419101715 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3643072843551636, "epoch": 9.97, "learning_rate": 1.521555367709214e-07, "loss": 0.2781, "step": 11794, "task_loss": 0.6984408497810364 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.33921003341674805, "epoch": 9.97, "learning_rate": 1.4792899408284025e-07, "loss": 0.4425, "step": 11795, "task_loss": 0.9837867617607117 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2672138214111328, "epoch": 9.97, "learning_rate": 1.437024513947591e-07, "loss": 0.3386, "step": 11796, "task_loss": 0.6427420973777771 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3433264493942261, "epoch": 9.97, "learning_rate": 1.3947590870667793e-07, "loss": 0.3824, "step": 11797, "task_loss": 0.26729127764701843 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2660945951938629, "epoch": 9.97, "learning_rate": 1.352493660185968e-07, "loss": 0.3078, "step": 11798, "task_loss": 0.5148373246192932 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.19694188237190247, "epoch": 9.97, "learning_rate": 1.3102282333051563e-07, "loss": 0.3864, "step": 11799, "task_loss": 0.6460482478141785 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.18113857507705688, "epoch": 9.97, "learning_rate": 1.267962806424345e-07, "loss": 0.2952, "step": 11800, "task_loss": 0.2964319884777069 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2834235429763794, "epoch": 9.97, "learning_rate": 1.2256973795435334e-07, "loss": 0.373, "step": 11801, "task_loss": 0.7258270382881165 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43097978830337524, "epoch": 9.98, "learning_rate": 1.183431952662722e-07, "loss": 0.2945, "step": 11802, "task_loss": 0.7247740626335144 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23921102285385132, "epoch": 9.98, "learning_rate": 1.1411665257819104e-07, "loss": 0.3422, "step": 11803, "task_loss": 0.47640129923820496 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3541773855686188, "epoch": 9.98, "learning_rate": 1.0989010989010989e-07, "loss": 0.3429, "step": 11804, "task_loss": 0.5667593479156494 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2955644130706787, "epoch": 9.98, "learning_rate": 1.0566356720202876e-07, "loss": 0.3195, "step": 11805, "task_loss": 0.11272616684436798 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4857930839061737, "epoch": 9.98, "learning_rate": 1.014370245139476e-07, "loss": 0.3202, "step": 11806, "task_loss": 0.30028092861175537 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3765583634376526, "epoch": 9.98, "learning_rate": 9.721048182586645e-08, "loss": 0.3557, "step": 11807, "task_loss": 0.46042531728744507 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2766381502151489, "epoch": 9.98, "learning_rate": 9.29839391377853e-08, "loss": 0.3689, "step": 11808, "task_loss": 0.3302498757839203 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.43337833881378174, "epoch": 9.98, "learning_rate": 8.875739644970414e-08, "loss": 0.3686, "step": 11809, "task_loss": 0.6017572283744812 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3352062702178955, "epoch": 9.98, "learning_rate": 8.453085376162299e-08, "loss": 0.5057, "step": 11810, "task_loss": 0.4864320158958435 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.346771776676178, "epoch": 9.98, "learning_rate": 8.030431107354185e-08, "loss": 0.3923, "step": 11811, "task_loss": 1.0557861328125 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2765885293483734, "epoch": 9.98, "learning_rate": 7.60777683854607e-08, "loss": 0.51, "step": 11812, "task_loss": 0.6310605406761169 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.28861021995544434, "epoch": 9.99, "learning_rate": 7.185122569737954e-08, "loss": 0.3857, "step": 11813, "task_loss": 0.6160259246826172 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.30933618545532227, "epoch": 9.99, "learning_rate": 6.76246830092984e-08, "loss": 0.2909, "step": 11814, "task_loss": 0.21384865045547485 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36904263496398926, "epoch": 9.99, "learning_rate": 6.339814032121725e-08, "loss": 0.4287, "step": 11815, "task_loss": 0.48181653022766113 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.6379642486572266, "epoch": 9.99, "learning_rate": 5.91715976331361e-08, "loss": 0.442, "step": 11816, "task_loss": 1.2292546033859253 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.416140615940094, "epoch": 9.99, "learning_rate": 5.4945054945054946e-08, "loss": 0.4102, "step": 11817, "task_loss": 0.4823341369628906 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3310478925704956, "epoch": 9.99, "learning_rate": 5.07185122569738e-08, "loss": 0.3042, "step": 11818, "task_loss": 0.41227760910987854 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.22006428241729736, "epoch": 9.99, "learning_rate": 4.649196956889265e-08, "loss": 0.3315, "step": 11819, "task_loss": 0.05721826106309891 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.1765422523021698, "epoch": 9.99, "learning_rate": 4.2265426880811495e-08, "loss": 0.3644, "step": 11820, "task_loss": 1.6966221332550049 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.36882930994033813, "epoch": 9.99, "learning_rate": 3.803888419273035e-08, "loss": 0.3452, "step": 11821, "task_loss": 0.7885497212409973 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.4522852897644043, "epoch": 9.99, "learning_rate": 3.38123415046492e-08, "loss": 0.3788, "step": 11822, "task_loss": 0.9584880471229553 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.29514628648757935, "epoch": 9.99, "learning_rate": 2.958579881656805e-08, "loss": 0.3308, "step": 11823, "task_loss": 0.020527873188257217 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3450312614440918, "epoch": 9.99, "learning_rate": 2.53592561284869e-08, "loss": 0.3556, "step": 11824, "task_loss": 0.8504543304443359 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2872273027896881, "epoch": 10.0, "learning_rate": 2.1132713440405747e-08, "loss": 0.3119, "step": 11825, "task_loss": 0.4323715269565582 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2442912757396698, "epoch": 10.0, "learning_rate": 1.69061707523246e-08, "loss": 0.3689, "step": 11826, "task_loss": 0.8550302982330322 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.32440152764320374, "epoch": 10.0, "learning_rate": 1.267962806424345e-08, "loss": 0.4003, "step": 11827, "task_loss": 0.4759567379951477 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.3857249617576599, "epoch": 10.0, "learning_rate": 8.4530853761623e-09, "loss": 0.405, "step": 11828, "task_loss": 0.8508530259132385 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.2739834785461426, "epoch": 10.0, "learning_rate": 4.22654268808115e-09, "loss": 0.285, "step": 11829, "task_loss": 0.9523541331291199 }, { "compression/movement_sparsity/importance_regularization_factor": 1.5, "compression/movement_sparsity/importance_threshold": 0.0, "compression/movement_sparsity/linear_layer_sparsity": 0.38010914810693824, "compression/movement_sparsity/model_sparsity": 0.3670512295169506, "compression_loss": 0.0, "distillation_loss": 0.23988965153694153, "epoch": 10.0, "learning_rate": 0.0, "loss": 0.3541, "step": 11830, "task_loss": 1.0655288696289062 }, { "epoch": 10.0, "step": 11830, "total_flos": 5.9664632082415714e+19, "train_loss": 12.959198828177591, "train_runtime": 51238.186, "train_samples_per_second": 14.784, "train_steps_per_second": 0.231 } ], "max_steps": 11830, "num_train_epochs": 10, "total_flos": 5.9664632082415714e+19, "trial_name": null, "trial_params": null }